snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -262,7 +264,6 @@ class PassiveAggressiveRegressor(BaseTransformer):
262
264
  sample_weight_col: Optional[str] = None,
263
265
  ) -> None:
264
266
  super().__init__()
265
- self.id = str(uuid4()).replace("-", "_").upper()
266
267
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
267
268
 
268
269
  self._deps = list(deps)
@@ -295,6 +296,15 @@ class PassiveAggressiveRegressor(BaseTransformer):
295
296
  self.set_drop_input_cols(drop_input_cols)
296
297
  self.set_sample_weight_col(sample_weight_col)
297
298
 
299
+ def _get_rand_id(self) -> str:
300
+ """
301
+ Generate random id to be used in sproc and stage names.
302
+
303
+ Returns:
304
+ Random id string usable in sproc, table, and stage names.
305
+ """
306
+ return str(uuid4()).replace("-", "_").upper()
307
+
298
308
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
299
309
  """
300
310
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -373,7 +383,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
373
383
  cp.dump(self._sklearn_object, local_transform_file)
374
384
 
375
385
  # Create temp stage to run fit.
376
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
386
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
377
387
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
378
388
  SqlResultValidator(
379
389
  session=session,
@@ -386,11 +396,12 @@ class PassiveAggressiveRegressor(BaseTransformer):
386
396
  expected_value=f"Stage area {transform_stage_name} successfully created."
387
397
  ).validate()
388
398
 
389
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
399
+ # Use posixpath to construct stage paths
400
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
401
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
390
402
  local_result_file_name = get_temp_file_path()
391
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
392
403
 
393
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
404
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
394
405
  statement_params = telemetry.get_function_usage_statement_params(
395
406
  project=_PROJECT,
396
407
  subproject=_SUBPROJECT,
@@ -416,6 +427,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
416
427
  replace=True,
417
428
  session=session,
418
429
  statement_params=statement_params,
430
+ anonymous=True
419
431
  )
420
432
  def fit_wrapper_sproc(
421
433
  session: Session,
@@ -424,7 +436,8 @@ class PassiveAggressiveRegressor(BaseTransformer):
424
436
  stage_result_file_name: str,
425
437
  input_cols: List[str],
426
438
  label_cols: List[str],
427
- sample_weight_col: Optional[str]
439
+ sample_weight_col: Optional[str],
440
+ statement_params: Dict[str, str]
428
441
  ) -> str:
429
442
  import cloudpickle as cp
430
443
  import numpy as np
@@ -491,15 +504,15 @@ class PassiveAggressiveRegressor(BaseTransformer):
491
504
  api_calls=[Session.call],
492
505
  custom_tags=dict([("autogen", True)]),
493
506
  )
494
- sproc_export_file_name = session.call(
495
- fit_sproc_name,
507
+ sproc_export_file_name = fit_wrapper_sproc(
508
+ session,
496
509
  query,
497
510
  stage_transform_file_name,
498
511
  stage_result_file_name,
499
512
  identifier.get_unescaped_names(self.input_cols),
500
513
  identifier.get_unescaped_names(self.label_cols),
501
514
  identifier.get_unescaped_names(self.sample_weight_col),
502
- statement_params=statement_params,
515
+ statement_params,
503
516
  )
504
517
 
505
518
  if "|" in sproc_export_file_name:
@@ -509,7 +522,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
509
522
  print("\n".join(fields[1:]))
510
523
 
511
524
  session.file.get(
512
- os.path.join(stage_result_file_name, sproc_export_file_name),
525
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
513
526
  local_result_file_name,
514
527
  statement_params=statement_params
515
528
  )
@@ -555,7 +568,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
555
568
 
556
569
  # Register vectorized UDF for batch inference
557
570
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
558
- safe_id=self.id, method=inference_method)
571
+ safe_id=self._get_rand_id(), method=inference_method)
559
572
 
560
573
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
561
574
  # will try to pickle all of self which fails.
@@ -647,7 +660,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
647
660
  return transformed_pandas_df.to_dict("records")
648
661
 
649
662
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
650
- safe_id=self.id
663
+ safe_id=self._get_rand_id()
651
664
  )
652
665
 
653
666
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -703,26 +716,37 @@ class PassiveAggressiveRegressor(BaseTransformer):
703
716
  # input cols need to match unquoted / quoted
704
717
  input_cols = self.input_cols
705
718
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
719
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
706
720
 
707
721
  estimator = self._sklearn_object
708
722
 
709
- input_df = dataset[input_cols] # Select input columns with quoted column names.
710
- if hasattr(estimator, "feature_names_in_"):
711
- missing_features = []
712
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
713
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
714
- missing_features.append(f)
715
-
716
- if len(missing_features) > 0:
717
- raise ValueError(
718
- "The feature names should match with those that were passed during fit.\n"
719
- f"Features seen during fit call but not present in the input: {missing_features}\n"
720
- f"Features in the input dataframe : {input_cols}\n"
721
- )
722
- input_df.columns = getattr(estimator, "feature_names_in_")
723
- else:
724
- # Just rename the column names to unquoted identifiers.
725
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
723
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
724
+ missing_features = []
725
+ features_in_dataset = set(dataset.columns)
726
+ columns_to_select = []
727
+ for i, f in enumerate(features_required_by_estimator):
728
+ if (
729
+ i >= len(input_cols)
730
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
731
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
732
+ and quoted_input_cols[i] not in features_in_dataset)
733
+ ):
734
+ missing_features.append(f)
735
+ elif input_cols[i] in features_in_dataset:
736
+ columns_to_select.append(input_cols[i])
737
+ elif unquoted_input_cols[i] in features_in_dataset:
738
+ columns_to_select.append(unquoted_input_cols[i])
739
+ else:
740
+ columns_to_select.append(quoted_input_cols[i])
741
+
742
+ if len(missing_features) > 0:
743
+ raise ValueError(
744
+ "The feature names should match with those that were passed during fit.\n"
745
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
746
+ f"Features in the input dataframe : {input_cols}\n"
747
+ )
748
+ input_df = dataset[columns_to_select]
749
+ input_df.columns = features_required_by_estimator
726
750
 
727
751
  transformed_numpy_array = getattr(estimator, inference_method)(
728
752
  input_df
@@ -803,11 +827,18 @@ class PassiveAggressiveRegressor(BaseTransformer):
803
827
  Transformed dataset.
804
828
  """
805
829
  if isinstance(dataset, DataFrame):
830
+ expected_type_inferred = "float"
831
+ # when it is classifier, infer the datatype from label columns
832
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
833
+ expected_type_inferred = convert_sp_to_sf_type(
834
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
835
+ )
836
+
806
837
  output_df = self._batch_inference(
807
838
  dataset=dataset,
808
839
  inference_method="predict",
809
840
  expected_output_cols_list=self.output_cols,
810
- expected_output_cols_type="float",
841
+ expected_output_cols_type=expected_type_inferred,
811
842
  )
812
843
  elif isinstance(dataset, pd.DataFrame):
813
844
  output_df = self._sklearn_inference(
@@ -878,10 +909,10 @@ class PassiveAggressiveRegressor(BaseTransformer):
878
909
 
879
910
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
880
911
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
881
- Returns an empty list if current object is not a classifier or not yet fitted.
912
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
882
913
  """
883
914
  if getattr(self._sklearn_object, "classes_", None) is None:
884
- return []
915
+ return [output_cols_prefix]
885
916
 
886
917
  classes = self._sklearn_object.classes_
887
918
  if isinstance(classes, numpy.ndarray):
@@ -1106,7 +1137,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
1106
1137
  cp.dump(self._sklearn_object, local_score_file)
1107
1138
 
1108
1139
  # Create temp stage to run score.
1109
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1140
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1110
1141
  session = dataset._session
1111
1142
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1112
1143
  SqlResultValidator(
@@ -1120,8 +1151,9 @@ class PassiveAggressiveRegressor(BaseTransformer):
1120
1151
  expected_value=f"Stage area {score_stage_name} successfully created."
1121
1152
  ).validate()
1122
1153
 
1123
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1124
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1154
+ # Use posixpath to construct stage paths
1155
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1156
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1125
1157
  statement_params = telemetry.get_function_usage_statement_params(
1126
1158
  project=_PROJECT,
1127
1159
  subproject=_SUBPROJECT,
@@ -1147,6 +1179,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
1147
1179
  replace=True,
1148
1180
  session=session,
1149
1181
  statement_params=statement_params,
1182
+ anonymous=True
1150
1183
  )
1151
1184
  def score_wrapper_sproc(
1152
1185
  session: Session,
@@ -1154,7 +1187,8 @@ class PassiveAggressiveRegressor(BaseTransformer):
1154
1187
  stage_score_file_name: str,
1155
1188
  input_cols: List[str],
1156
1189
  label_cols: List[str],
1157
- sample_weight_col: Optional[str]
1190
+ sample_weight_col: Optional[str],
1191
+ statement_params: Dict[str, str]
1158
1192
  ) -> float:
1159
1193
  import cloudpickle as cp
1160
1194
  import numpy as np
@@ -1204,14 +1238,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
1204
1238
  api_calls=[Session.call],
1205
1239
  custom_tags=dict([("autogen", True)]),
1206
1240
  )
1207
- score = session.call(
1208
- score_sproc_name,
1241
+ score = score_wrapper_sproc(
1242
+ session,
1209
1243
  query,
1210
1244
  stage_score_file_name,
1211
1245
  identifier.get_unescaped_names(self.input_cols),
1212
1246
  identifier.get_unescaped_names(self.label_cols),
1213
1247
  identifier.get_unescaped_names(self.sample_weight_col),
1214
- statement_params=statement_params,
1248
+ statement_params,
1215
1249
  )
1216
1250
 
1217
1251
  cleanup_temp_files([local_score_file_name])
@@ -1229,18 +1263,20 @@ class PassiveAggressiveRegressor(BaseTransformer):
1229
1263
  if self._sklearn_object._estimator_type == 'classifier':
1230
1264
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1231
1265
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1232
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1266
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1267
+ ([] if self._drop_input_cols else inputs) + outputs)
1233
1268
  # For regressor, the type of predict is float64
1234
1269
  elif self._sklearn_object._estimator_type == 'regressor':
1235
1270
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1236
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1237
-
1271
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1272
+ ([] if self._drop_input_cols else inputs) + outputs)
1238
1273
  for prob_func in PROB_FUNCTIONS:
1239
1274
  if hasattr(self, prob_func):
1240
1275
  output_cols_prefix: str = f"{prob_func}_"
1241
1276
  output_column_names = self._get_output_column_names(output_cols_prefix)
1242
1277
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1243
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1278
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1279
+ ([] if self._drop_input_cols else inputs) + outputs)
1244
1280
 
1245
1281
  @property
1246
1282
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -273,7 +275,6 @@ class Perceptron(BaseTransformer):
273
275
  sample_weight_col: Optional[str] = None,
274
276
  ) -> None:
275
277
  super().__init__()
276
- self.id = str(uuid4()).replace("-", "_").upper()
277
278
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
278
279
 
279
280
  self._deps = list(deps)
@@ -308,6 +309,15 @@ class Perceptron(BaseTransformer):
308
309
  self.set_drop_input_cols(drop_input_cols)
309
310
  self.set_sample_weight_col(sample_weight_col)
310
311
 
312
+ def _get_rand_id(self) -> str:
313
+ """
314
+ Generate random id to be used in sproc and stage names.
315
+
316
+ Returns:
317
+ Random id string usable in sproc, table, and stage names.
318
+ """
319
+ return str(uuid4()).replace("-", "_").upper()
320
+
311
321
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
312
322
  """
313
323
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -386,7 +396,7 @@ class Perceptron(BaseTransformer):
386
396
  cp.dump(self._sklearn_object, local_transform_file)
387
397
 
388
398
  # Create temp stage to run fit.
389
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
399
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
390
400
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
391
401
  SqlResultValidator(
392
402
  session=session,
@@ -399,11 +409,12 @@ class Perceptron(BaseTransformer):
399
409
  expected_value=f"Stage area {transform_stage_name} successfully created."
400
410
  ).validate()
401
411
 
402
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
412
+ # Use posixpath to construct stage paths
413
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
414
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
403
415
  local_result_file_name = get_temp_file_path()
404
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
405
416
 
406
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
417
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
407
418
  statement_params = telemetry.get_function_usage_statement_params(
408
419
  project=_PROJECT,
409
420
  subproject=_SUBPROJECT,
@@ -429,6 +440,7 @@ class Perceptron(BaseTransformer):
429
440
  replace=True,
430
441
  session=session,
431
442
  statement_params=statement_params,
443
+ anonymous=True
432
444
  )
433
445
  def fit_wrapper_sproc(
434
446
  session: Session,
@@ -437,7 +449,8 @@ class Perceptron(BaseTransformer):
437
449
  stage_result_file_name: str,
438
450
  input_cols: List[str],
439
451
  label_cols: List[str],
440
- sample_weight_col: Optional[str]
452
+ sample_weight_col: Optional[str],
453
+ statement_params: Dict[str, str]
441
454
  ) -> str:
442
455
  import cloudpickle as cp
443
456
  import numpy as np
@@ -504,15 +517,15 @@ class Perceptron(BaseTransformer):
504
517
  api_calls=[Session.call],
505
518
  custom_tags=dict([("autogen", True)]),
506
519
  )
507
- sproc_export_file_name = session.call(
508
- fit_sproc_name,
520
+ sproc_export_file_name = fit_wrapper_sproc(
521
+ session,
509
522
  query,
510
523
  stage_transform_file_name,
511
524
  stage_result_file_name,
512
525
  identifier.get_unescaped_names(self.input_cols),
513
526
  identifier.get_unescaped_names(self.label_cols),
514
527
  identifier.get_unescaped_names(self.sample_weight_col),
515
- statement_params=statement_params,
528
+ statement_params,
516
529
  )
517
530
 
518
531
  if "|" in sproc_export_file_name:
@@ -522,7 +535,7 @@ class Perceptron(BaseTransformer):
522
535
  print("\n".join(fields[1:]))
523
536
 
524
537
  session.file.get(
525
- os.path.join(stage_result_file_name, sproc_export_file_name),
538
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
526
539
  local_result_file_name,
527
540
  statement_params=statement_params
528
541
  )
@@ -568,7 +581,7 @@ class Perceptron(BaseTransformer):
568
581
 
569
582
  # Register vectorized UDF for batch inference
570
583
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
571
- safe_id=self.id, method=inference_method)
584
+ safe_id=self._get_rand_id(), method=inference_method)
572
585
 
573
586
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
574
587
  # will try to pickle all of self which fails.
@@ -660,7 +673,7 @@ class Perceptron(BaseTransformer):
660
673
  return transformed_pandas_df.to_dict("records")
661
674
 
662
675
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
663
- safe_id=self.id
676
+ safe_id=self._get_rand_id()
664
677
  )
665
678
 
666
679
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -716,26 +729,37 @@ class Perceptron(BaseTransformer):
716
729
  # input cols need to match unquoted / quoted
717
730
  input_cols = self.input_cols
718
731
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
732
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
719
733
 
720
734
  estimator = self._sklearn_object
721
735
 
722
- input_df = dataset[input_cols] # Select input columns with quoted column names.
723
- if hasattr(estimator, "feature_names_in_"):
724
- missing_features = []
725
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
726
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
727
- missing_features.append(f)
728
-
729
- if len(missing_features) > 0:
730
- raise ValueError(
731
- "The feature names should match with those that were passed during fit.\n"
732
- f"Features seen during fit call but not present in the input: {missing_features}\n"
733
- f"Features in the input dataframe : {input_cols}\n"
734
- )
735
- input_df.columns = getattr(estimator, "feature_names_in_")
736
- else:
737
- # Just rename the column names to unquoted identifiers.
738
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
736
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
737
+ missing_features = []
738
+ features_in_dataset = set(dataset.columns)
739
+ columns_to_select = []
740
+ for i, f in enumerate(features_required_by_estimator):
741
+ if (
742
+ i >= len(input_cols)
743
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
744
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
745
+ and quoted_input_cols[i] not in features_in_dataset)
746
+ ):
747
+ missing_features.append(f)
748
+ elif input_cols[i] in features_in_dataset:
749
+ columns_to_select.append(input_cols[i])
750
+ elif unquoted_input_cols[i] in features_in_dataset:
751
+ columns_to_select.append(unquoted_input_cols[i])
752
+ else:
753
+ columns_to_select.append(quoted_input_cols[i])
754
+
755
+ if len(missing_features) > 0:
756
+ raise ValueError(
757
+ "The feature names should match with those that were passed during fit.\n"
758
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
759
+ f"Features in the input dataframe : {input_cols}\n"
760
+ )
761
+ input_df = dataset[columns_to_select]
762
+ input_df.columns = features_required_by_estimator
739
763
 
740
764
  transformed_numpy_array = getattr(estimator, inference_method)(
741
765
  input_df
@@ -816,11 +840,18 @@ class Perceptron(BaseTransformer):
816
840
  Transformed dataset.
817
841
  """
818
842
  if isinstance(dataset, DataFrame):
843
+ expected_type_inferred = ""
844
+ # when it is classifier, infer the datatype from label columns
845
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
846
+ expected_type_inferred = convert_sp_to_sf_type(
847
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
848
+ )
849
+
819
850
  output_df = self._batch_inference(
820
851
  dataset=dataset,
821
852
  inference_method="predict",
822
853
  expected_output_cols_list=self.output_cols,
823
- expected_output_cols_type="",
854
+ expected_output_cols_type=expected_type_inferred,
824
855
  )
825
856
  elif isinstance(dataset, pd.DataFrame):
826
857
  output_df = self._sklearn_inference(
@@ -891,10 +922,10 @@ class Perceptron(BaseTransformer):
891
922
 
892
923
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
893
924
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
894
- Returns an empty list if current object is not a classifier or not yet fitted.
925
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
895
926
  """
896
927
  if getattr(self._sklearn_object, "classes_", None) is None:
897
- return []
928
+ return [output_cols_prefix]
898
929
 
899
930
  classes = self._sklearn_object.classes_
900
931
  if isinstance(classes, numpy.ndarray):
@@ -1121,7 +1152,7 @@ class Perceptron(BaseTransformer):
1121
1152
  cp.dump(self._sklearn_object, local_score_file)
1122
1153
 
1123
1154
  # Create temp stage to run score.
1124
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1155
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1125
1156
  session = dataset._session
1126
1157
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1127
1158
  SqlResultValidator(
@@ -1135,8 +1166,9 @@ class Perceptron(BaseTransformer):
1135
1166
  expected_value=f"Stage area {score_stage_name} successfully created."
1136
1167
  ).validate()
1137
1168
 
1138
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1139
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1169
+ # Use posixpath to construct stage paths
1170
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1171
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1140
1172
  statement_params = telemetry.get_function_usage_statement_params(
1141
1173
  project=_PROJECT,
1142
1174
  subproject=_SUBPROJECT,
@@ -1162,6 +1194,7 @@ class Perceptron(BaseTransformer):
1162
1194
  replace=True,
1163
1195
  session=session,
1164
1196
  statement_params=statement_params,
1197
+ anonymous=True
1165
1198
  )
1166
1199
  def score_wrapper_sproc(
1167
1200
  session: Session,
@@ -1169,7 +1202,8 @@ class Perceptron(BaseTransformer):
1169
1202
  stage_score_file_name: str,
1170
1203
  input_cols: List[str],
1171
1204
  label_cols: List[str],
1172
- sample_weight_col: Optional[str]
1205
+ sample_weight_col: Optional[str],
1206
+ statement_params: Dict[str, str]
1173
1207
  ) -> float:
1174
1208
  import cloudpickle as cp
1175
1209
  import numpy as np
@@ -1219,14 +1253,14 @@ class Perceptron(BaseTransformer):
1219
1253
  api_calls=[Session.call],
1220
1254
  custom_tags=dict([("autogen", True)]),
1221
1255
  )
1222
- score = session.call(
1223
- score_sproc_name,
1256
+ score = score_wrapper_sproc(
1257
+ session,
1224
1258
  query,
1225
1259
  stage_score_file_name,
1226
1260
  identifier.get_unescaped_names(self.input_cols),
1227
1261
  identifier.get_unescaped_names(self.label_cols),
1228
1262
  identifier.get_unescaped_names(self.sample_weight_col),
1229
- statement_params=statement_params,
1263
+ statement_params,
1230
1264
  )
1231
1265
 
1232
1266
  cleanup_temp_files([local_score_file_name])
@@ -1244,18 +1278,20 @@ class Perceptron(BaseTransformer):
1244
1278
  if self._sklearn_object._estimator_type == 'classifier':
1245
1279
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1246
1280
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1247
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1281
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1282
+ ([] if self._drop_input_cols else inputs) + outputs)
1248
1283
  # For regressor, the type of predict is float64
1249
1284
  elif self._sklearn_object._estimator_type == 'regressor':
1250
1285
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1251
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1252
-
1286
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1287
+ ([] if self._drop_input_cols else inputs) + outputs)
1253
1288
  for prob_func in PROB_FUNCTIONS:
1254
1289
  if hasattr(self, prob_func):
1255
1290
  output_cols_prefix: str = f"{prob_func}_"
1256
1291
  output_column_names = self._get_output_column_names(output_cols_prefix)
1257
1292
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1258
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1293
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1294
+ ([] if self._drop_input_cols else inputs) + outputs)
1259
1295
 
1260
1296
  @property
1261
1297
  def model_signatures(self) -> Dict[str, ModelSignature]: