snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -272,7 +274,6 @@ class NuSVC(BaseTransformer):
272
274
  sample_weight_col: Optional[str] = None,
273
275
  ) -> None:
274
276
  super().__init__()
275
- self.id = str(uuid4()).replace("-", "_").upper()
276
277
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
277
278
 
278
279
  self._deps = list(deps)
@@ -306,6 +307,15 @@ class NuSVC(BaseTransformer):
306
307
  self.set_drop_input_cols(drop_input_cols)
307
308
  self.set_sample_weight_col(sample_weight_col)
308
309
 
310
+ def _get_rand_id(self) -> str:
311
+ """
312
+ Generate random id to be used in sproc and stage names.
313
+
314
+ Returns:
315
+ Random id string usable in sproc, table, and stage names.
316
+ """
317
+ return str(uuid4()).replace("-", "_").upper()
318
+
309
319
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
310
320
  """
311
321
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -384,7 +394,7 @@ class NuSVC(BaseTransformer):
384
394
  cp.dump(self._sklearn_object, local_transform_file)
385
395
 
386
396
  # Create temp stage to run fit.
387
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
397
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
388
398
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
389
399
  SqlResultValidator(
390
400
  session=session,
@@ -397,11 +407,12 @@ class NuSVC(BaseTransformer):
397
407
  expected_value=f"Stage area {transform_stage_name} successfully created."
398
408
  ).validate()
399
409
 
400
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
410
+ # Use posixpath to construct stage paths
411
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
412
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
401
413
  local_result_file_name = get_temp_file_path()
402
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
403
414
 
404
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
415
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
405
416
  statement_params = telemetry.get_function_usage_statement_params(
406
417
  project=_PROJECT,
407
418
  subproject=_SUBPROJECT,
@@ -427,6 +438,7 @@ class NuSVC(BaseTransformer):
427
438
  replace=True,
428
439
  session=session,
429
440
  statement_params=statement_params,
441
+ anonymous=True
430
442
  )
431
443
  def fit_wrapper_sproc(
432
444
  session: Session,
@@ -435,7 +447,8 @@ class NuSVC(BaseTransformer):
435
447
  stage_result_file_name: str,
436
448
  input_cols: List[str],
437
449
  label_cols: List[str],
438
- sample_weight_col: Optional[str]
450
+ sample_weight_col: Optional[str],
451
+ statement_params: Dict[str, str]
439
452
  ) -> str:
440
453
  import cloudpickle as cp
441
454
  import numpy as np
@@ -502,15 +515,15 @@ class NuSVC(BaseTransformer):
502
515
  api_calls=[Session.call],
503
516
  custom_tags=dict([("autogen", True)]),
504
517
  )
505
- sproc_export_file_name = session.call(
506
- fit_sproc_name,
518
+ sproc_export_file_name = fit_wrapper_sproc(
519
+ session,
507
520
  query,
508
521
  stage_transform_file_name,
509
522
  stage_result_file_name,
510
523
  identifier.get_unescaped_names(self.input_cols),
511
524
  identifier.get_unescaped_names(self.label_cols),
512
525
  identifier.get_unescaped_names(self.sample_weight_col),
513
- statement_params=statement_params,
526
+ statement_params,
514
527
  )
515
528
 
516
529
  if "|" in sproc_export_file_name:
@@ -520,7 +533,7 @@ class NuSVC(BaseTransformer):
520
533
  print("\n".join(fields[1:]))
521
534
 
522
535
  session.file.get(
523
- os.path.join(stage_result_file_name, sproc_export_file_name),
536
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
524
537
  local_result_file_name,
525
538
  statement_params=statement_params
526
539
  )
@@ -566,7 +579,7 @@ class NuSVC(BaseTransformer):
566
579
 
567
580
  # Register vectorized UDF for batch inference
568
581
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
569
- safe_id=self.id, method=inference_method)
582
+ safe_id=self._get_rand_id(), method=inference_method)
570
583
 
571
584
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
572
585
  # will try to pickle all of self which fails.
@@ -658,7 +671,7 @@ class NuSVC(BaseTransformer):
658
671
  return transformed_pandas_df.to_dict("records")
659
672
 
660
673
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
661
- safe_id=self.id
674
+ safe_id=self._get_rand_id()
662
675
  )
663
676
 
664
677
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -714,26 +727,37 @@ class NuSVC(BaseTransformer):
714
727
  # input cols need to match unquoted / quoted
715
728
  input_cols = self.input_cols
716
729
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
730
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
717
731
 
718
732
  estimator = self._sklearn_object
719
733
 
720
- input_df = dataset[input_cols] # Select input columns with quoted column names.
721
- if hasattr(estimator, "feature_names_in_"):
722
- missing_features = []
723
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
724
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
725
- missing_features.append(f)
726
-
727
- if len(missing_features) > 0:
728
- raise ValueError(
729
- "The feature names should match with those that were passed during fit.\n"
730
- f"Features seen during fit call but not present in the input: {missing_features}\n"
731
- f"Features in the input dataframe : {input_cols}\n"
732
- )
733
- input_df.columns = getattr(estimator, "feature_names_in_")
734
- else:
735
- # Just rename the column names to unquoted identifiers.
736
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
734
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
735
+ missing_features = []
736
+ features_in_dataset = set(dataset.columns)
737
+ columns_to_select = []
738
+ for i, f in enumerate(features_required_by_estimator):
739
+ if (
740
+ i >= len(input_cols)
741
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
742
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
743
+ and quoted_input_cols[i] not in features_in_dataset)
744
+ ):
745
+ missing_features.append(f)
746
+ elif input_cols[i] in features_in_dataset:
747
+ columns_to_select.append(input_cols[i])
748
+ elif unquoted_input_cols[i] in features_in_dataset:
749
+ columns_to_select.append(unquoted_input_cols[i])
750
+ else:
751
+ columns_to_select.append(quoted_input_cols[i])
752
+
753
+ if len(missing_features) > 0:
754
+ raise ValueError(
755
+ "The feature names should match with those that were passed during fit.\n"
756
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
757
+ f"Features in the input dataframe : {input_cols}\n"
758
+ )
759
+ input_df = dataset[columns_to_select]
760
+ input_df.columns = features_required_by_estimator
737
761
 
738
762
  transformed_numpy_array = getattr(estimator, inference_method)(
739
763
  input_df
@@ -814,11 +838,18 @@ class NuSVC(BaseTransformer):
814
838
  Transformed dataset.
815
839
  """
816
840
  if isinstance(dataset, DataFrame):
841
+ expected_type_inferred = ""
842
+ # when it is classifier, infer the datatype from label columns
843
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
844
+ expected_type_inferred = convert_sp_to_sf_type(
845
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
846
+ )
847
+
817
848
  output_df = self._batch_inference(
818
849
  dataset=dataset,
819
850
  inference_method="predict",
820
851
  expected_output_cols_list=self.output_cols,
821
- expected_output_cols_type="",
852
+ expected_output_cols_type=expected_type_inferred,
822
853
  )
823
854
  elif isinstance(dataset, pd.DataFrame):
824
855
  output_df = self._sklearn_inference(
@@ -889,10 +920,10 @@ class NuSVC(BaseTransformer):
889
920
 
890
921
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
891
922
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
892
- Returns an empty list if current object is not a classifier or not yet fitted.
923
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
893
924
  """
894
925
  if getattr(self._sklearn_object, "classes_", None) is None:
895
- return []
926
+ return [output_cols_prefix]
896
927
 
897
928
  classes = self._sklearn_object.classes_
898
929
  if isinstance(classes, numpy.ndarray):
@@ -1123,7 +1154,7 @@ class NuSVC(BaseTransformer):
1123
1154
  cp.dump(self._sklearn_object, local_score_file)
1124
1155
 
1125
1156
  # Create temp stage to run score.
1126
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1157
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1127
1158
  session = dataset._session
1128
1159
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1129
1160
  SqlResultValidator(
@@ -1137,8 +1168,9 @@ class NuSVC(BaseTransformer):
1137
1168
  expected_value=f"Stage area {score_stage_name} successfully created."
1138
1169
  ).validate()
1139
1170
 
1140
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1141
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1171
+ # Use posixpath to construct stage paths
1172
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1173
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1142
1174
  statement_params = telemetry.get_function_usage_statement_params(
1143
1175
  project=_PROJECT,
1144
1176
  subproject=_SUBPROJECT,
@@ -1164,6 +1196,7 @@ class NuSVC(BaseTransformer):
1164
1196
  replace=True,
1165
1197
  session=session,
1166
1198
  statement_params=statement_params,
1199
+ anonymous=True
1167
1200
  )
1168
1201
  def score_wrapper_sproc(
1169
1202
  session: Session,
@@ -1171,7 +1204,8 @@ class NuSVC(BaseTransformer):
1171
1204
  stage_score_file_name: str,
1172
1205
  input_cols: List[str],
1173
1206
  label_cols: List[str],
1174
- sample_weight_col: Optional[str]
1207
+ sample_weight_col: Optional[str],
1208
+ statement_params: Dict[str, str]
1175
1209
  ) -> float:
1176
1210
  import cloudpickle as cp
1177
1211
  import numpy as np
@@ -1221,14 +1255,14 @@ class NuSVC(BaseTransformer):
1221
1255
  api_calls=[Session.call],
1222
1256
  custom_tags=dict([("autogen", True)]),
1223
1257
  )
1224
- score = session.call(
1225
- score_sproc_name,
1258
+ score = score_wrapper_sproc(
1259
+ session,
1226
1260
  query,
1227
1261
  stage_score_file_name,
1228
1262
  identifier.get_unescaped_names(self.input_cols),
1229
1263
  identifier.get_unescaped_names(self.label_cols),
1230
1264
  identifier.get_unescaped_names(self.sample_weight_col),
1231
- statement_params=statement_params,
1265
+ statement_params,
1232
1266
  )
1233
1267
 
1234
1268
  cleanup_temp_files([local_score_file_name])
@@ -1246,18 +1280,20 @@ class NuSVC(BaseTransformer):
1246
1280
  if self._sklearn_object._estimator_type == 'classifier':
1247
1281
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1248
1282
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1249
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1283
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1284
+ ([] if self._drop_input_cols else inputs) + outputs)
1250
1285
  # For regressor, the type of predict is float64
1251
1286
  elif self._sklearn_object._estimator_type == 'regressor':
1252
1287
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1253
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1254
-
1288
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1289
+ ([] if self._drop_input_cols else inputs) + outputs)
1255
1290
  for prob_func in PROB_FUNCTIONS:
1256
1291
  if hasattr(self, prob_func):
1257
1292
  output_cols_prefix: str = f"{prob_func}_"
1258
1293
  output_column_names = self._get_output_column_names(output_cols_prefix)
1259
1294
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1260
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1295
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1296
+ ([] if self._drop_input_cols else inputs) + outputs)
1261
1297
 
1262
1298
  @property
1263
1299
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -237,7 +239,6 @@ class NuSVR(BaseTransformer):
237
239
  sample_weight_col: Optional[str] = None,
238
240
  ) -> None:
239
241
  super().__init__()
240
- self.id = str(uuid4()).replace("-", "_").upper()
241
242
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
242
243
 
243
244
  self._deps = list(deps)
@@ -267,6 +268,15 @@ class NuSVR(BaseTransformer):
267
268
  self.set_drop_input_cols(drop_input_cols)
268
269
  self.set_sample_weight_col(sample_weight_col)
269
270
 
271
+ def _get_rand_id(self) -> str:
272
+ """
273
+ Generate random id to be used in sproc and stage names.
274
+
275
+ Returns:
276
+ Random id string usable in sproc, table, and stage names.
277
+ """
278
+ return str(uuid4()).replace("-", "_").upper()
279
+
270
280
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
271
281
  """
272
282
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -345,7 +355,7 @@ class NuSVR(BaseTransformer):
345
355
  cp.dump(self._sklearn_object, local_transform_file)
346
356
 
347
357
  # Create temp stage to run fit.
348
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
358
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
349
359
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
350
360
  SqlResultValidator(
351
361
  session=session,
@@ -358,11 +368,12 @@ class NuSVR(BaseTransformer):
358
368
  expected_value=f"Stage area {transform_stage_name} successfully created."
359
369
  ).validate()
360
370
 
361
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
371
+ # Use posixpath to construct stage paths
372
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
373
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
362
374
  local_result_file_name = get_temp_file_path()
363
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
364
375
 
365
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
376
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
366
377
  statement_params = telemetry.get_function_usage_statement_params(
367
378
  project=_PROJECT,
368
379
  subproject=_SUBPROJECT,
@@ -388,6 +399,7 @@ class NuSVR(BaseTransformer):
388
399
  replace=True,
389
400
  session=session,
390
401
  statement_params=statement_params,
402
+ anonymous=True
391
403
  )
392
404
  def fit_wrapper_sproc(
393
405
  session: Session,
@@ -396,7 +408,8 @@ class NuSVR(BaseTransformer):
396
408
  stage_result_file_name: str,
397
409
  input_cols: List[str],
398
410
  label_cols: List[str],
399
- sample_weight_col: Optional[str]
411
+ sample_weight_col: Optional[str],
412
+ statement_params: Dict[str, str]
400
413
  ) -> str:
401
414
  import cloudpickle as cp
402
415
  import numpy as np
@@ -463,15 +476,15 @@ class NuSVR(BaseTransformer):
463
476
  api_calls=[Session.call],
464
477
  custom_tags=dict([("autogen", True)]),
465
478
  )
466
- sproc_export_file_name = session.call(
467
- fit_sproc_name,
479
+ sproc_export_file_name = fit_wrapper_sproc(
480
+ session,
468
481
  query,
469
482
  stage_transform_file_name,
470
483
  stage_result_file_name,
471
484
  identifier.get_unescaped_names(self.input_cols),
472
485
  identifier.get_unescaped_names(self.label_cols),
473
486
  identifier.get_unescaped_names(self.sample_weight_col),
474
- statement_params=statement_params,
487
+ statement_params,
475
488
  )
476
489
 
477
490
  if "|" in sproc_export_file_name:
@@ -481,7 +494,7 @@ class NuSVR(BaseTransformer):
481
494
  print("\n".join(fields[1:]))
482
495
 
483
496
  session.file.get(
484
- os.path.join(stage_result_file_name, sproc_export_file_name),
497
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
485
498
  local_result_file_name,
486
499
  statement_params=statement_params
487
500
  )
@@ -527,7 +540,7 @@ class NuSVR(BaseTransformer):
527
540
 
528
541
  # Register vectorized UDF for batch inference
529
542
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
530
- safe_id=self.id, method=inference_method)
543
+ safe_id=self._get_rand_id(), method=inference_method)
531
544
 
532
545
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
533
546
  # will try to pickle all of self which fails.
@@ -619,7 +632,7 @@ class NuSVR(BaseTransformer):
619
632
  return transformed_pandas_df.to_dict("records")
620
633
 
621
634
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
622
- safe_id=self.id
635
+ safe_id=self._get_rand_id()
623
636
  )
624
637
 
625
638
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -675,26 +688,37 @@ class NuSVR(BaseTransformer):
675
688
  # input cols need to match unquoted / quoted
676
689
  input_cols = self.input_cols
677
690
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
691
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
678
692
 
679
693
  estimator = self._sklearn_object
680
694
 
681
- input_df = dataset[input_cols] # Select input columns with quoted column names.
682
- if hasattr(estimator, "feature_names_in_"):
683
- missing_features = []
684
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
685
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
686
- missing_features.append(f)
687
-
688
- if len(missing_features) > 0:
689
- raise ValueError(
690
- "The feature names should match with those that were passed during fit.\n"
691
- f"Features seen during fit call but not present in the input: {missing_features}\n"
692
- f"Features in the input dataframe : {input_cols}\n"
693
- )
694
- input_df.columns = getattr(estimator, "feature_names_in_")
695
- else:
696
- # Just rename the column names to unquoted identifiers.
697
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
695
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
696
+ missing_features = []
697
+ features_in_dataset = set(dataset.columns)
698
+ columns_to_select = []
699
+ for i, f in enumerate(features_required_by_estimator):
700
+ if (
701
+ i >= len(input_cols)
702
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
703
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
704
+ and quoted_input_cols[i] not in features_in_dataset)
705
+ ):
706
+ missing_features.append(f)
707
+ elif input_cols[i] in features_in_dataset:
708
+ columns_to_select.append(input_cols[i])
709
+ elif unquoted_input_cols[i] in features_in_dataset:
710
+ columns_to_select.append(unquoted_input_cols[i])
711
+ else:
712
+ columns_to_select.append(quoted_input_cols[i])
713
+
714
+ if len(missing_features) > 0:
715
+ raise ValueError(
716
+ "The feature names should match with those that were passed during fit.\n"
717
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
718
+ f"Features in the input dataframe : {input_cols}\n"
719
+ )
720
+ input_df = dataset[columns_to_select]
721
+ input_df.columns = features_required_by_estimator
698
722
 
699
723
  transformed_numpy_array = getattr(estimator, inference_method)(
700
724
  input_df
@@ -775,11 +799,18 @@ class NuSVR(BaseTransformer):
775
799
  Transformed dataset.
776
800
  """
777
801
  if isinstance(dataset, DataFrame):
802
+ expected_type_inferred = "float"
803
+ # when it is classifier, infer the datatype from label columns
804
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
805
+ expected_type_inferred = convert_sp_to_sf_type(
806
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
807
+ )
808
+
778
809
  output_df = self._batch_inference(
779
810
  dataset=dataset,
780
811
  inference_method="predict",
781
812
  expected_output_cols_list=self.output_cols,
782
- expected_output_cols_type="float",
813
+ expected_output_cols_type=expected_type_inferred,
783
814
  )
784
815
  elif isinstance(dataset, pd.DataFrame):
785
816
  output_df = self._sklearn_inference(
@@ -850,10 +881,10 @@ class NuSVR(BaseTransformer):
850
881
 
851
882
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
852
883
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
853
- Returns an empty list if current object is not a classifier or not yet fitted.
884
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
854
885
  """
855
886
  if getattr(self._sklearn_object, "classes_", None) is None:
856
- return []
887
+ return [output_cols_prefix]
857
888
 
858
889
  classes = self._sklearn_object.classes_
859
890
  if isinstance(classes, numpy.ndarray):
@@ -1078,7 +1109,7 @@ class NuSVR(BaseTransformer):
1078
1109
  cp.dump(self._sklearn_object, local_score_file)
1079
1110
 
1080
1111
  # Create temp stage to run score.
1081
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1112
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1082
1113
  session = dataset._session
1083
1114
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1084
1115
  SqlResultValidator(
@@ -1092,8 +1123,9 @@ class NuSVR(BaseTransformer):
1092
1123
  expected_value=f"Stage area {score_stage_name} successfully created."
1093
1124
  ).validate()
1094
1125
 
1095
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1096
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1126
+ # Use posixpath to construct stage paths
1127
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1128
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1097
1129
  statement_params = telemetry.get_function_usage_statement_params(
1098
1130
  project=_PROJECT,
1099
1131
  subproject=_SUBPROJECT,
@@ -1119,6 +1151,7 @@ class NuSVR(BaseTransformer):
1119
1151
  replace=True,
1120
1152
  session=session,
1121
1153
  statement_params=statement_params,
1154
+ anonymous=True
1122
1155
  )
1123
1156
  def score_wrapper_sproc(
1124
1157
  session: Session,
@@ -1126,7 +1159,8 @@ class NuSVR(BaseTransformer):
1126
1159
  stage_score_file_name: str,
1127
1160
  input_cols: List[str],
1128
1161
  label_cols: List[str],
1129
- sample_weight_col: Optional[str]
1162
+ sample_weight_col: Optional[str],
1163
+ statement_params: Dict[str, str]
1130
1164
  ) -> float:
1131
1165
  import cloudpickle as cp
1132
1166
  import numpy as np
@@ -1176,14 +1210,14 @@ class NuSVR(BaseTransformer):
1176
1210
  api_calls=[Session.call],
1177
1211
  custom_tags=dict([("autogen", True)]),
1178
1212
  )
1179
- score = session.call(
1180
- score_sproc_name,
1213
+ score = score_wrapper_sproc(
1214
+ session,
1181
1215
  query,
1182
1216
  stage_score_file_name,
1183
1217
  identifier.get_unescaped_names(self.input_cols),
1184
1218
  identifier.get_unescaped_names(self.label_cols),
1185
1219
  identifier.get_unescaped_names(self.sample_weight_col),
1186
- statement_params=statement_params,
1220
+ statement_params,
1187
1221
  )
1188
1222
 
1189
1223
  cleanup_temp_files([local_score_file_name])
@@ -1201,18 +1235,20 @@ class NuSVR(BaseTransformer):
1201
1235
  if self._sklearn_object._estimator_type == 'classifier':
1202
1236
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1203
1237
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1204
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1238
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1239
+ ([] if self._drop_input_cols else inputs) + outputs)
1205
1240
  # For regressor, the type of predict is float64
1206
1241
  elif self._sklearn_object._estimator_type == 'regressor':
1207
1242
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1208
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1209
-
1243
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1244
+ ([] if self._drop_input_cols else inputs) + outputs)
1210
1245
  for prob_func in PROB_FUNCTIONS:
1211
1246
  if hasattr(self, prob_func):
1212
1247
  output_cols_prefix: str = f"{prob_func}_"
1213
1248
  output_column_names = self._get_output_column_names(output_cols_prefix)
1214
1249
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1215
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1250
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1251
+ ([] if self._drop_input_cols else inputs) + outputs)
1216
1252
 
1217
1253
  @property
1218
1254
  def model_signatures(self) -> Dict[str, ModelSignature]: