snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -302,7 +304,6 @@ class TSNE(BaseTransformer):
302
304
  sample_weight_col: Optional[str] = None,
303
305
  ) -> None:
304
306
  super().__init__()
305
- self.id = str(uuid4()).replace("-", "_").upper()
306
307
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
307
308
 
308
309
  self._deps = list(deps)
@@ -337,6 +338,15 @@ class TSNE(BaseTransformer):
337
338
  self.set_drop_input_cols(drop_input_cols)
338
339
  self.set_sample_weight_col(sample_weight_col)
339
340
 
341
+ def _get_rand_id(self) -> str:
342
+ """
343
+ Generate random id to be used in sproc and stage names.
344
+
345
+ Returns:
346
+ Random id string usable in sproc, table, and stage names.
347
+ """
348
+ return str(uuid4()).replace("-", "_").upper()
349
+
340
350
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
341
351
  """
342
352
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -415,7 +425,7 @@ class TSNE(BaseTransformer):
415
425
  cp.dump(self._sklearn_object, local_transform_file)
416
426
 
417
427
  # Create temp stage to run fit.
418
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
428
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
419
429
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
420
430
  SqlResultValidator(
421
431
  session=session,
@@ -428,11 +438,12 @@ class TSNE(BaseTransformer):
428
438
  expected_value=f"Stage area {transform_stage_name} successfully created."
429
439
  ).validate()
430
440
 
431
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
441
+ # Use posixpath to construct stage paths
442
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
443
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
432
444
  local_result_file_name = get_temp_file_path()
433
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
434
445
 
435
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
446
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
436
447
  statement_params = telemetry.get_function_usage_statement_params(
437
448
  project=_PROJECT,
438
449
  subproject=_SUBPROJECT,
@@ -458,6 +469,7 @@ class TSNE(BaseTransformer):
458
469
  replace=True,
459
470
  session=session,
460
471
  statement_params=statement_params,
472
+ anonymous=True
461
473
  )
462
474
  def fit_wrapper_sproc(
463
475
  session: Session,
@@ -466,7 +478,8 @@ class TSNE(BaseTransformer):
466
478
  stage_result_file_name: str,
467
479
  input_cols: List[str],
468
480
  label_cols: List[str],
469
- sample_weight_col: Optional[str]
481
+ sample_weight_col: Optional[str],
482
+ statement_params: Dict[str, str]
470
483
  ) -> str:
471
484
  import cloudpickle as cp
472
485
  import numpy as np
@@ -533,15 +546,15 @@ class TSNE(BaseTransformer):
533
546
  api_calls=[Session.call],
534
547
  custom_tags=dict([("autogen", True)]),
535
548
  )
536
- sproc_export_file_name = session.call(
537
- fit_sproc_name,
549
+ sproc_export_file_name = fit_wrapper_sproc(
550
+ session,
538
551
  query,
539
552
  stage_transform_file_name,
540
553
  stage_result_file_name,
541
554
  identifier.get_unescaped_names(self.input_cols),
542
555
  identifier.get_unescaped_names(self.label_cols),
543
556
  identifier.get_unescaped_names(self.sample_weight_col),
544
- statement_params=statement_params,
557
+ statement_params,
545
558
  )
546
559
 
547
560
  if "|" in sproc_export_file_name:
@@ -551,7 +564,7 @@ class TSNE(BaseTransformer):
551
564
  print("\n".join(fields[1:]))
552
565
 
553
566
  session.file.get(
554
- os.path.join(stage_result_file_name, sproc_export_file_name),
567
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
555
568
  local_result_file_name,
556
569
  statement_params=statement_params
557
570
  )
@@ -597,7 +610,7 @@ class TSNE(BaseTransformer):
597
610
 
598
611
  # Register vectorized UDF for batch inference
599
612
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
600
- safe_id=self.id, method=inference_method)
613
+ safe_id=self._get_rand_id(), method=inference_method)
601
614
 
602
615
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
603
616
  # will try to pickle all of self which fails.
@@ -689,7 +702,7 @@ class TSNE(BaseTransformer):
689
702
  return transformed_pandas_df.to_dict("records")
690
703
 
691
704
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
692
- safe_id=self.id
705
+ safe_id=self._get_rand_id()
693
706
  )
694
707
 
695
708
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -745,26 +758,37 @@ class TSNE(BaseTransformer):
745
758
  # input cols need to match unquoted / quoted
746
759
  input_cols = self.input_cols
747
760
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
761
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
748
762
 
749
763
  estimator = self._sklearn_object
750
764
 
751
- input_df = dataset[input_cols] # Select input columns with quoted column names.
752
- if hasattr(estimator, "feature_names_in_"):
753
- missing_features = []
754
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
755
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
756
- missing_features.append(f)
757
-
758
- if len(missing_features) > 0:
759
- raise ValueError(
760
- "The feature names should match with those that were passed during fit.\n"
761
- f"Features seen during fit call but not present in the input: {missing_features}\n"
762
- f"Features in the input dataframe : {input_cols}\n"
763
- )
764
- input_df.columns = getattr(estimator, "feature_names_in_")
765
- else:
766
- # Just rename the column names to unquoted identifiers.
767
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
765
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
766
+ missing_features = []
767
+ features_in_dataset = set(dataset.columns)
768
+ columns_to_select = []
769
+ for i, f in enumerate(features_required_by_estimator):
770
+ if (
771
+ i >= len(input_cols)
772
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
773
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
774
+ and quoted_input_cols[i] not in features_in_dataset)
775
+ ):
776
+ missing_features.append(f)
777
+ elif input_cols[i] in features_in_dataset:
778
+ columns_to_select.append(input_cols[i])
779
+ elif unquoted_input_cols[i] in features_in_dataset:
780
+ columns_to_select.append(unquoted_input_cols[i])
781
+ else:
782
+ columns_to_select.append(quoted_input_cols[i])
783
+
784
+ if len(missing_features) > 0:
785
+ raise ValueError(
786
+ "The feature names should match with those that were passed during fit.\n"
787
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
788
+ f"Features in the input dataframe : {input_cols}\n"
789
+ )
790
+ input_df = dataset[columns_to_select]
791
+ input_df.columns = features_required_by_estimator
768
792
 
769
793
  transformed_numpy_array = getattr(estimator, inference_method)(
770
794
  input_df
@@ -843,11 +867,18 @@ class TSNE(BaseTransformer):
843
867
  Transformed dataset.
844
868
  """
845
869
  if isinstance(dataset, DataFrame):
870
+ expected_type_inferred = ""
871
+ # when it is classifier, infer the datatype from label columns
872
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
873
+ expected_type_inferred = convert_sp_to_sf_type(
874
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
875
+ )
876
+
846
877
  output_df = self._batch_inference(
847
878
  dataset=dataset,
848
879
  inference_method="predict",
849
880
  expected_output_cols_list=self.output_cols,
850
- expected_output_cols_type="",
881
+ expected_output_cols_type=expected_type_inferred,
851
882
  )
852
883
  elif isinstance(dataset, pd.DataFrame):
853
884
  output_df = self._sklearn_inference(
@@ -918,10 +949,10 @@ class TSNE(BaseTransformer):
918
949
 
919
950
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
920
951
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
921
- Returns an empty list if current object is not a classifier or not yet fitted.
952
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
922
953
  """
923
954
  if getattr(self._sklearn_object, "classes_", None) is None:
924
- return []
955
+ return [output_cols_prefix]
925
956
 
926
957
  classes = self._sklearn_object.classes_
927
958
  if isinstance(classes, numpy.ndarray):
@@ -1146,7 +1177,7 @@ class TSNE(BaseTransformer):
1146
1177
  cp.dump(self._sklearn_object, local_score_file)
1147
1178
 
1148
1179
  # Create temp stage to run score.
1149
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1180
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1150
1181
  session = dataset._session
1151
1182
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1152
1183
  SqlResultValidator(
@@ -1160,8 +1191,9 @@ class TSNE(BaseTransformer):
1160
1191
  expected_value=f"Stage area {score_stage_name} successfully created."
1161
1192
  ).validate()
1162
1193
 
1163
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1164
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1194
+ # Use posixpath to construct stage paths
1195
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1196
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1165
1197
  statement_params = telemetry.get_function_usage_statement_params(
1166
1198
  project=_PROJECT,
1167
1199
  subproject=_SUBPROJECT,
@@ -1187,6 +1219,7 @@ class TSNE(BaseTransformer):
1187
1219
  replace=True,
1188
1220
  session=session,
1189
1221
  statement_params=statement_params,
1222
+ anonymous=True
1190
1223
  )
1191
1224
  def score_wrapper_sproc(
1192
1225
  session: Session,
@@ -1194,7 +1227,8 @@ class TSNE(BaseTransformer):
1194
1227
  stage_score_file_name: str,
1195
1228
  input_cols: List[str],
1196
1229
  label_cols: List[str],
1197
- sample_weight_col: Optional[str]
1230
+ sample_weight_col: Optional[str],
1231
+ statement_params: Dict[str, str]
1198
1232
  ) -> float:
1199
1233
  import cloudpickle as cp
1200
1234
  import numpy as np
@@ -1244,14 +1278,14 @@ class TSNE(BaseTransformer):
1244
1278
  api_calls=[Session.call],
1245
1279
  custom_tags=dict([("autogen", True)]),
1246
1280
  )
1247
- score = session.call(
1248
- score_sproc_name,
1281
+ score = score_wrapper_sproc(
1282
+ session,
1249
1283
  query,
1250
1284
  stage_score_file_name,
1251
1285
  identifier.get_unescaped_names(self.input_cols),
1252
1286
  identifier.get_unescaped_names(self.label_cols),
1253
1287
  identifier.get_unescaped_names(self.sample_weight_col),
1254
- statement_params=statement_params,
1288
+ statement_params,
1255
1289
  )
1256
1290
 
1257
1291
  cleanup_temp_files([local_score_file_name])
@@ -1269,18 +1303,20 @@ class TSNE(BaseTransformer):
1269
1303
  if self._sklearn_object._estimator_type == 'classifier':
1270
1304
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1271
1305
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1272
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1306
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1307
+ ([] if self._drop_input_cols else inputs) + outputs)
1273
1308
  # For regressor, the type of predict is float64
1274
1309
  elif self._sklearn_object._estimator_type == 'regressor':
1275
1310
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1276
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1277
-
1311
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1312
+ ([] if self._drop_input_cols else inputs) + outputs)
1278
1313
  for prob_func in PROB_FUNCTIONS:
1279
1314
  if hasattr(self, prob_func):
1280
1315
  output_cols_prefix: str = f"{prob_func}_"
1281
1316
  output_column_names = self._get_output_column_names(output_cols_prefix)
1282
1317
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1283
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1318
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1319
+ ([] if self._drop_input_cols else inputs) + outputs)
1284
1320
 
1285
1321
  @property
1286
1322
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -54,7 +54,12 @@ def accuracy_score(
54
54
  metrics_utils.check_label_columns(y_true_col_names, y_pred_col_names)
55
55
 
56
56
  if isinstance(y_true_col_names, str) or (len(y_true_col_names) == 1):
57
- score_column = F.iff(df[y_true_col_names] == df[y_pred_col_names], 1, 0) # type: ignore[arg-type]
57
+ y_true, y_pred = (
58
+ (y_true_col_names, y_pred_col_names)
59
+ if isinstance(y_true_col_names, str)
60
+ else (y_true_col_names[0], y_pred_col_names[0])
61
+ )
62
+ score_column = F.iff(df[y_true] == df[y_pred], 1, 0) # type: ignore[arg-type]
58
63
  # multilabel
59
64
  else:
60
65
  expr = " and ".join([f"({y_true_col_names[i]} = {y_pred_col_names[i]})" for i in range(len(y_true_col_names))])