snowflake-ml-python 1.6.1__py3-none-any.whl → 1.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. snowflake/cortex/__init__.py +4 -0
  2. snowflake/cortex/_classify_text.py +2 -2
  3. snowflake/cortex/_embed_text_1024.py +37 -0
  4. snowflake/cortex/_embed_text_768.py +37 -0
  5. snowflake/cortex/_extract_answer.py +2 -2
  6. snowflake/cortex/_sentiment.py +2 -2
  7. snowflake/cortex/_summarize.py +2 -2
  8. snowflake/cortex/_translate.py +2 -2
  9. snowflake/cortex/_util.py +4 -4
  10. snowflake/ml/_internal/env_utils.py +5 -5
  11. snowflake/ml/_internal/exceptions/error_codes.py +2 -0
  12. snowflake/ml/_internal/telemetry.py +142 -20
  13. snowflake/ml/_internal/utils/db_utils.py +50 -0
  14. snowflake/ml/_internal/utils/identifier.py +48 -11
  15. snowflake/ml/_internal/utils/service_logger.py +63 -0
  16. snowflake/ml/_internal/utils/snowflake_env.py +23 -13
  17. snowflake/ml/_internal/utils/sql_identifier.py +26 -2
  18. snowflake/ml/_internal/utils/table_manager.py +19 -1
  19. snowflake/ml/data/_internal/arrow_ingestor.py +1 -11
  20. snowflake/ml/data/data_connector.py +33 -7
  21. snowflake/ml/data/ingestor_utils.py +20 -10
  22. snowflake/ml/data/torch_utils.py +68 -0
  23. snowflake/ml/dataset/dataset.py +1 -3
  24. snowflake/ml/feature_store/access_manager.py +3 -3
  25. snowflake/ml/feature_store/feature_store.py +60 -19
  26. snowflake/ml/feature_store/feature_view.py +84 -30
  27. snowflake/ml/fileset/embedded_stage_fs.py +1 -1
  28. snowflake/ml/fileset/fileset.py +1 -1
  29. snowflake/ml/fileset/sfcfs.py +9 -3
  30. snowflake/ml/fileset/stage_fs.py +2 -1
  31. snowflake/ml/lineage/lineage_node.py +7 -2
  32. snowflake/ml/model/__init__.py +1 -2
  33. snowflake/ml/model/_client/model/model_version_impl.py +96 -12
  34. snowflake/ml/model/_client/ops/model_ops.py +124 -6
  35. snowflake/ml/model/_client/ops/service_ops.py +309 -9
  36. snowflake/ml/model/_client/service/model_deployment_spec.py +8 -5
  37. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +2 -2
  38. snowflake/ml/model/_client/sql/_base.py +5 -0
  39. snowflake/ml/model/_client/sql/model.py +1 -0
  40. snowflake/ml/model/_client/sql/model_version.py +9 -5
  41. snowflake/ml/model/_client/sql/service.py +121 -20
  42. snowflake/ml/model/_model_composer/model_composer.py +11 -39
  43. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -11
  44. snowflake/ml/model/_packager/model_env/model_env.py +4 -38
  45. snowflake/ml/model/_packager/model_handlers/_utils.py +134 -28
  46. snowflake/ml/model/_packager/model_handlers/catboost.py +31 -30
  47. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +26 -18
  48. snowflake/ml/model/_packager/model_handlers/lightgbm.py +31 -58
  49. snowflake/ml/model/_packager/model_handlers/mlflow.py +3 -5
  50. snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +169 -0
  51. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +15 -8
  52. snowflake/ml/model/_packager/model_handlers/sklearn.py +56 -60
  53. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +141 -9
  54. snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
  55. snowflake/ml/model/_packager/model_handlers/xgboost.py +63 -48
  56. snowflake/ml/model/_packager/model_meta/model_meta.py +16 -42
  57. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +1 -14
  58. snowflake/ml/model/_packager/model_packager.py +14 -8
  59. snowflake/ml/model/_packager/model_runtime/model_runtime.py +11 -0
  60. snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
  61. snowflake/ml/model/_signatures/snowpark_handler.py +3 -2
  62. snowflake/ml/model/_signatures/utils.py +9 -0
  63. snowflake/ml/model/type_hints.py +12 -145
  64. snowflake/ml/modeling/_internal/constants.py +1 -0
  65. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
  66. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
  67. snowflake/ml/modeling/_internal/model_specifications.py +2 -0
  68. snowflake/ml/modeling/_internal/model_trainer.py +1 -0
  69. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -4
  70. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
  71. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +130 -166
  72. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +0 -1
  73. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +61 -21
  74. snowflake/ml/modeling/cluster/affinity_propagation.py +61 -21
  75. snowflake/ml/modeling/cluster/agglomerative_clustering.py +61 -21
  76. snowflake/ml/modeling/cluster/birch.py +61 -21
  77. snowflake/ml/modeling/cluster/bisecting_k_means.py +61 -21
  78. snowflake/ml/modeling/cluster/dbscan.py +61 -21
  79. snowflake/ml/modeling/cluster/feature_agglomeration.py +61 -21
  80. snowflake/ml/modeling/cluster/k_means.py +61 -21
  81. snowflake/ml/modeling/cluster/mean_shift.py +61 -21
  82. snowflake/ml/modeling/cluster/mini_batch_k_means.py +61 -21
  83. snowflake/ml/modeling/cluster/optics.py +61 -21
  84. snowflake/ml/modeling/cluster/spectral_biclustering.py +61 -21
  85. snowflake/ml/modeling/cluster/spectral_clustering.py +61 -21
  86. snowflake/ml/modeling/cluster/spectral_coclustering.py +61 -21
  87. snowflake/ml/modeling/compose/column_transformer.py +61 -21
  88. snowflake/ml/modeling/compose/transformed_target_regressor.py +61 -21
  89. snowflake/ml/modeling/covariance/elliptic_envelope.py +61 -21
  90. snowflake/ml/modeling/covariance/empirical_covariance.py +61 -21
  91. snowflake/ml/modeling/covariance/graphical_lasso.py +61 -21
  92. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +61 -21
  93. snowflake/ml/modeling/covariance/ledoit_wolf.py +61 -21
  94. snowflake/ml/modeling/covariance/min_cov_det.py +61 -21
  95. snowflake/ml/modeling/covariance/oas.py +61 -21
  96. snowflake/ml/modeling/covariance/shrunk_covariance.py +61 -21
  97. snowflake/ml/modeling/decomposition/dictionary_learning.py +61 -21
  98. snowflake/ml/modeling/decomposition/factor_analysis.py +61 -21
  99. snowflake/ml/modeling/decomposition/fast_ica.py +61 -21
  100. snowflake/ml/modeling/decomposition/incremental_pca.py +61 -21
  101. snowflake/ml/modeling/decomposition/kernel_pca.py +61 -21
  102. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +61 -21
  103. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +61 -21
  104. snowflake/ml/modeling/decomposition/pca.py +61 -21
  105. snowflake/ml/modeling/decomposition/sparse_pca.py +61 -21
  106. snowflake/ml/modeling/decomposition/truncated_svd.py +61 -21
  107. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +61 -21
  108. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +61 -21
  109. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +61 -21
  110. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +61 -21
  111. snowflake/ml/modeling/ensemble/bagging_classifier.py +61 -21
  112. snowflake/ml/modeling/ensemble/bagging_regressor.py +61 -21
  113. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +61 -21
  114. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +61 -21
  115. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +61 -21
  116. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +61 -21
  117. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +61 -21
  118. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +61 -21
  119. snowflake/ml/modeling/ensemble/isolation_forest.py +61 -21
  120. snowflake/ml/modeling/ensemble/random_forest_classifier.py +61 -21
  121. snowflake/ml/modeling/ensemble/random_forest_regressor.py +61 -21
  122. snowflake/ml/modeling/ensemble/stacking_regressor.py +61 -21
  123. snowflake/ml/modeling/ensemble/voting_classifier.py +61 -21
  124. snowflake/ml/modeling/ensemble/voting_regressor.py +61 -21
  125. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +61 -21
  126. snowflake/ml/modeling/feature_selection/select_fdr.py +61 -21
  127. snowflake/ml/modeling/feature_selection/select_fpr.py +61 -21
  128. snowflake/ml/modeling/feature_selection/select_fwe.py +61 -21
  129. snowflake/ml/modeling/feature_selection/select_k_best.py +61 -21
  130. snowflake/ml/modeling/feature_selection/select_percentile.py +61 -21
  131. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +61 -21
  132. snowflake/ml/modeling/feature_selection/variance_threshold.py +61 -21
  133. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +61 -21
  134. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +61 -21
  135. snowflake/ml/modeling/impute/iterative_imputer.py +61 -21
  136. snowflake/ml/modeling/impute/knn_imputer.py +61 -21
  137. snowflake/ml/modeling/impute/missing_indicator.py +61 -21
  138. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +61 -21
  139. snowflake/ml/modeling/kernel_approximation/nystroem.py +61 -21
  140. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +61 -21
  141. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +61 -21
  142. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +61 -21
  143. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +61 -21
  144. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +61 -21
  145. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +61 -21
  146. snowflake/ml/modeling/linear_model/ard_regression.py +61 -21
  147. snowflake/ml/modeling/linear_model/bayesian_ridge.py +61 -21
  148. snowflake/ml/modeling/linear_model/elastic_net.py +61 -21
  149. snowflake/ml/modeling/linear_model/elastic_net_cv.py +61 -21
  150. snowflake/ml/modeling/linear_model/gamma_regressor.py +61 -21
  151. snowflake/ml/modeling/linear_model/huber_regressor.py +61 -21
  152. snowflake/ml/modeling/linear_model/lars.py +61 -21
  153. snowflake/ml/modeling/linear_model/lars_cv.py +61 -21
  154. snowflake/ml/modeling/linear_model/lasso.py +61 -21
  155. snowflake/ml/modeling/linear_model/lasso_cv.py +61 -21
  156. snowflake/ml/modeling/linear_model/lasso_lars.py +61 -21
  157. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +61 -21
  158. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +61 -21
  159. snowflake/ml/modeling/linear_model/linear_regression.py +61 -21
  160. snowflake/ml/modeling/linear_model/logistic_regression.py +61 -21
  161. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +61 -21
  162. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +61 -21
  163. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +61 -21
  164. snowflake/ml/modeling/linear_model/multi_task_lasso.py +61 -21
  165. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +61 -21
  166. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +61 -21
  167. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +61 -21
  168. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +61 -21
  169. snowflake/ml/modeling/linear_model/perceptron.py +61 -21
  170. snowflake/ml/modeling/linear_model/poisson_regressor.py +61 -21
  171. snowflake/ml/modeling/linear_model/ransac_regressor.py +61 -21
  172. snowflake/ml/modeling/linear_model/ridge.py +61 -21
  173. snowflake/ml/modeling/linear_model/ridge_classifier.py +61 -21
  174. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +61 -21
  175. snowflake/ml/modeling/linear_model/ridge_cv.py +61 -21
  176. snowflake/ml/modeling/linear_model/sgd_classifier.py +61 -21
  177. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +61 -21
  178. snowflake/ml/modeling/linear_model/sgd_regressor.py +61 -21
  179. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +61 -21
  180. snowflake/ml/modeling/linear_model/tweedie_regressor.py +61 -21
  181. snowflake/ml/modeling/manifold/isomap.py +61 -21
  182. snowflake/ml/modeling/manifold/mds.py +61 -21
  183. snowflake/ml/modeling/manifold/spectral_embedding.py +61 -21
  184. snowflake/ml/modeling/manifold/tsne.py +61 -21
  185. snowflake/ml/modeling/metrics/metrics_utils.py +2 -2
  186. snowflake/ml/modeling/metrics/ranking.py +0 -3
  187. snowflake/ml/modeling/metrics/regression.py +0 -3
  188. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +61 -21
  189. snowflake/ml/modeling/mixture/gaussian_mixture.py +61 -21
  190. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +61 -21
  191. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +61 -21
  192. snowflake/ml/modeling/multiclass/output_code_classifier.py +61 -21
  193. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +61 -21
  194. snowflake/ml/modeling/naive_bayes/categorical_nb.py +61 -21
  195. snowflake/ml/modeling/naive_bayes/complement_nb.py +61 -21
  196. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +61 -21
  197. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +61 -21
  198. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +61 -21
  199. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +61 -21
  200. snowflake/ml/modeling/neighbors/kernel_density.py +61 -21
  201. snowflake/ml/modeling/neighbors/local_outlier_factor.py +61 -21
  202. snowflake/ml/modeling/neighbors/nearest_centroid.py +61 -21
  203. snowflake/ml/modeling/neighbors/nearest_neighbors.py +61 -21
  204. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +61 -21
  205. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +61 -21
  206. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +61 -21
  207. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +61 -21
  208. snowflake/ml/modeling/neural_network/mlp_classifier.py +61 -21
  209. snowflake/ml/modeling/neural_network/mlp_regressor.py +61 -21
  210. snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
  211. snowflake/ml/modeling/pipeline/pipeline.py +1 -13
  212. snowflake/ml/modeling/preprocessing/polynomial_features.py +61 -21
  213. snowflake/ml/modeling/semi_supervised/label_propagation.py +61 -21
  214. snowflake/ml/modeling/semi_supervised/label_spreading.py +61 -21
  215. snowflake/ml/modeling/svm/linear_svc.py +61 -21
  216. snowflake/ml/modeling/svm/linear_svr.py +61 -21
  217. snowflake/ml/modeling/svm/nu_svc.py +61 -21
  218. snowflake/ml/modeling/svm/nu_svr.py +61 -21
  219. snowflake/ml/modeling/svm/svc.py +61 -21
  220. snowflake/ml/modeling/svm/svr.py +61 -21
  221. snowflake/ml/modeling/tree/decision_tree_classifier.py +61 -21
  222. snowflake/ml/modeling/tree/decision_tree_regressor.py +61 -21
  223. snowflake/ml/modeling/tree/extra_tree_classifier.py +61 -21
  224. snowflake/ml/modeling/tree/extra_tree_regressor.py +61 -21
  225. snowflake/ml/modeling/xgboost/xgb_classifier.py +64 -23
  226. snowflake/ml/modeling/xgboost/xgb_regressor.py +64 -23
  227. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +64 -23
  228. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +64 -23
  229. snowflake/ml/monitoring/_client/model_monitor.py +126 -0
  230. snowflake/ml/monitoring/_client/model_monitor_manager.py +361 -0
  231. snowflake/ml/monitoring/_client/model_monitor_version.py +1 -0
  232. snowflake/ml/monitoring/_client/monitor_sql_client.py +1335 -0
  233. snowflake/ml/monitoring/_client/queries/record_count.ssql +14 -0
  234. snowflake/ml/monitoring/_client/queries/rmse.ssql +28 -0
  235. snowflake/ml/monitoring/entities/model_monitor_config.py +28 -0
  236. snowflake/ml/monitoring/entities/model_monitor_interval.py +46 -0
  237. snowflake/ml/monitoring/entities/output_score_type.py +90 -0
  238. snowflake/ml/registry/_manager/model_manager.py +4 -0
  239. snowflake/ml/registry/registry.py +166 -8
  240. snowflake/ml/version.py +1 -1
  241. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/METADATA +43 -9
  242. snowflake_ml_python-1.6.3.dist-info/RECORD +400 -0
  243. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/WHEEL +1 -1
  244. snowflake/ml/_internal/container_services/image_registry/credential.py +0 -84
  245. snowflake/ml/_internal/container_services/image_registry/http_client.py +0 -127
  246. snowflake/ml/_internal/container_services/image_registry/imagelib.py +0 -400
  247. snowflake/ml/_internal/container_services/image_registry/registry_client.py +0 -212
  248. snowflake/ml/_internal/utils/log_stream_processor.py +0 -30
  249. snowflake/ml/_internal/utils/session_token_manager.py +0 -46
  250. snowflake/ml/_internal/utils/spcs_attribution_utils.py +0 -122
  251. snowflake/ml/_internal/utils/uri.py +0 -77
  252. snowflake/ml/data/torch_dataset.py +0 -33
  253. snowflake/ml/model/_api.py +0 -568
  254. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +0 -12
  255. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +0 -249
  256. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +0 -130
  257. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +0 -36
  258. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +0 -268
  259. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +0 -215
  260. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +0 -53
  261. snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +0 -38
  262. snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +0 -105
  263. snowflake/ml/model/_deploy_client/snowservice/deploy.py +0 -611
  264. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +0 -116
  265. snowflake/ml/model/_deploy_client/snowservice/instance_types.py +0 -10
  266. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +0 -28
  267. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template_with_model +0 -21
  268. snowflake/ml/model/_deploy_client/utils/constants.py +0 -48
  269. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +0 -280
  270. snowflake/ml/model/_deploy_client/warehouse/deploy.py +0 -202
  271. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +0 -99
  272. snowflake/ml/model/_packager/model_handlers/llm.py +0 -267
  273. snowflake/ml/model/_packager/model_meta/_core_requirements.py +0 -11
  274. snowflake/ml/model/deploy_platforms.py +0 -6
  275. snowflake/ml/model/models/llm.py +0 -104
  276. snowflake/ml/monitoring/monitor.py +0 -203
  277. snowflake/ml/registry/_initial_schema.py +0 -142
  278. snowflake/ml/registry/_schema.py +0 -82
  279. snowflake/ml/registry/_schema_upgrade_plans.py +0 -116
  280. snowflake/ml/registry/_schema_version_manager.py +0 -163
  281. snowflake/ml/registry/model_registry.py +0 -2048
  282. snowflake_ml_python-1.6.1.dist-info/RECORD +0 -422
  283. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/LICENSE.txt +0 -0
  284. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/top_level.txt +0 -0
@@ -4,14 +4,12 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
@@ -24,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
24
22
  from snowflake.ml._internal import telemetry
25
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
26
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
28
26
  from snowflake.snowpark import DataFrame, Session
29
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
32
- ModelTransformHandlers,
33
30
  BatchInferenceKwargsTypedDict,
34
31
  ScoreKwargsTypedDict
35
32
  )
@@ -536,12 +533,23 @@ class NearestNeighbors(BaseTransformer):
536
533
  autogenerated=self._autogenerated,
537
534
  subproject=_SUBPROJECT,
538
535
  )
539
- output_result, fitted_estimator = model_trainer.train_fit_predict(
540
- drop_input_cols=self._drop_input_cols,
541
- expected_output_cols_list=(
542
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
543
- ),
536
+ expected_output_cols = (
537
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
544
538
  )
539
+ if isinstance(dataset, DataFrame):
540
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
541
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
542
+ )
543
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
544
+ drop_input_cols=self._drop_input_cols,
545
+ expected_output_cols_list=expected_output_cols,
546
+ example_output_pd_df=example_output_pd_df,
547
+ )
548
+ else:
549
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
550
+ drop_input_cols=self._drop_input_cols,
551
+ expected_output_cols_list=expected_output_cols,
552
+ )
545
553
  self._sklearn_object = fitted_estimator
546
554
  self._is_fitted = True
547
555
  return output_result
@@ -564,6 +572,7 @@ class NearestNeighbors(BaseTransformer):
564
572
  """
565
573
  self._infer_input_output_cols(dataset)
566
574
  super()._check_dataset_type(dataset)
575
+
567
576
  model_trainer = ModelTrainerBuilder.build_fit_transform(
568
577
  estimator=self._sklearn_object,
569
578
  dataset=dataset,
@@ -620,12 +629,41 @@ class NearestNeighbors(BaseTransformer):
620
629
 
621
630
  return rv
622
631
 
623
- def _align_expected_output_names(
624
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
625
- ) -> List[str]:
632
+ def _align_expected_output(
633
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
634
+ ) -> Tuple[List[str], pd.DataFrame]:
635
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
636
+ and output dataframe with 1 line.
637
+ If the method is fit_predict, run 2 lines of data.
638
+ """
626
639
  # in case the inferred output column names dimension is different
627
640
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
628
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
641
+
642
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
643
+ # so change the minimum of number of rows to 2
644
+ num_examples = 2
645
+ statement_params = telemetry.get_function_usage_statement_params(
646
+ project=_PROJECT,
647
+ subproject=_SUBPROJECT,
648
+ function_name=telemetry.get_statement_params_full_func_name(
649
+ inspect.currentframe(), NearestNeighbors.__class__.__name__
650
+ ),
651
+ api_calls=[Session.call],
652
+ custom_tags={"autogen": True} if self._autogenerated else None,
653
+ )
654
+ if output_cols_prefix == "fit_predict_":
655
+ if hasattr(self._sklearn_object, "n_clusters"):
656
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
657
+ num_examples = self._sklearn_object.n_clusters
658
+ elif hasattr(self._sklearn_object, "min_samples"):
659
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
660
+ num_examples = self._sklearn_object.min_samples
661
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
662
+ # LocalOutlierFactor expects n_neighbors <= n_samples
663
+ num_examples = self._sklearn_object.n_neighbors
664
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
665
+ else:
666
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
629
667
 
630
668
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
631
669
  # seen during the fit.
@@ -637,12 +675,14 @@ class NearestNeighbors(BaseTransformer):
637
675
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
638
676
  if self.sample_weight_col:
639
677
  output_df_columns_set -= set(self.sample_weight_col)
678
+
640
679
  # if the dimension of inferred output column names is correct; use it
641
680
  if len(expected_output_cols_list) == len(output_df_columns_set):
642
- return expected_output_cols_list
681
+ return expected_output_cols_list, output_df_pd
643
682
  # otherwise, use the sklearn estimator's output
644
683
  else:
645
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
684
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
685
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
646
686
 
647
687
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
648
688
  @telemetry.send_api_usage_telemetry(
@@ -688,7 +728,7 @@ class NearestNeighbors(BaseTransformer):
688
728
  drop_input_cols=self._drop_input_cols,
689
729
  expected_output_cols_type="float",
690
730
  )
691
- expected_output_cols = self._align_expected_output_names(
731
+ expected_output_cols, _ = self._align_expected_output(
692
732
  inference_method, dataset, expected_output_cols, output_cols_prefix
693
733
  )
694
734
 
@@ -754,7 +794,7 @@ class NearestNeighbors(BaseTransformer):
754
794
  drop_input_cols=self._drop_input_cols,
755
795
  expected_output_cols_type="float",
756
796
  )
757
- expected_output_cols = self._align_expected_output_names(
797
+ expected_output_cols, _ = self._align_expected_output(
758
798
  inference_method, dataset, expected_output_cols, output_cols_prefix
759
799
  )
760
800
  elif isinstance(dataset, pd.DataFrame):
@@ -817,7 +857,7 @@ class NearestNeighbors(BaseTransformer):
817
857
  drop_input_cols=self._drop_input_cols,
818
858
  expected_output_cols_type="float",
819
859
  )
820
- expected_output_cols = self._align_expected_output_names(
860
+ expected_output_cols, _ = self._align_expected_output(
821
861
  inference_method, dataset, expected_output_cols, output_cols_prefix
822
862
  )
823
863
 
@@ -882,7 +922,7 @@ class NearestNeighbors(BaseTransformer):
882
922
  drop_input_cols = self._drop_input_cols,
883
923
  expected_output_cols_type="float",
884
924
  )
885
- expected_output_cols = self._align_expected_output_names(
925
+ expected_output_cols, _ = self._align_expected_output(
886
926
  inference_method, dataset, expected_output_cols, output_cols_prefix
887
927
  )
888
928
 
@@ -4,14 +4,12 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
@@ -24,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
24
22
  from snowflake.ml._internal import telemetry
25
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
26
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
28
26
  from snowflake.snowpark import DataFrame, Session
29
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
32
- ModelTransformHandlers,
33
30
  BatchInferenceKwargsTypedDict,
34
31
  ScoreKwargsTypedDict
35
32
  )
@@ -557,12 +554,23 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
557
554
  autogenerated=self._autogenerated,
558
555
  subproject=_SUBPROJECT,
559
556
  )
560
- output_result, fitted_estimator = model_trainer.train_fit_predict(
561
- drop_input_cols=self._drop_input_cols,
562
- expected_output_cols_list=(
563
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
564
- ),
557
+ expected_output_cols = (
558
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
565
559
  )
560
+ if isinstance(dataset, DataFrame):
561
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
562
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
563
+ )
564
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
565
+ drop_input_cols=self._drop_input_cols,
566
+ expected_output_cols_list=expected_output_cols,
567
+ example_output_pd_df=example_output_pd_df,
568
+ )
569
+ else:
570
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
571
+ drop_input_cols=self._drop_input_cols,
572
+ expected_output_cols_list=expected_output_cols,
573
+ )
566
574
  self._sklearn_object = fitted_estimator
567
575
  self._is_fitted = True
568
576
  return output_result
@@ -587,6 +595,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
587
595
  """
588
596
  self._infer_input_output_cols(dataset)
589
597
  super()._check_dataset_type(dataset)
598
+
590
599
  model_trainer = ModelTrainerBuilder.build_fit_transform(
591
600
  estimator=self._sklearn_object,
592
601
  dataset=dataset,
@@ -643,12 +652,41 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
643
652
 
644
653
  return rv
645
654
 
646
- def _align_expected_output_names(
647
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
648
- ) -> List[str]:
655
+ def _align_expected_output(
656
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
657
+ ) -> Tuple[List[str], pd.DataFrame]:
658
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
659
+ and output dataframe with 1 line.
660
+ If the method is fit_predict, run 2 lines of data.
661
+ """
649
662
  # in case the inferred output column names dimension is different
650
663
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
651
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
664
+
665
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
666
+ # so change the minimum of number of rows to 2
667
+ num_examples = 2
668
+ statement_params = telemetry.get_function_usage_statement_params(
669
+ project=_PROJECT,
670
+ subproject=_SUBPROJECT,
671
+ function_name=telemetry.get_statement_params_full_func_name(
672
+ inspect.currentframe(), NeighborhoodComponentsAnalysis.__class__.__name__
673
+ ),
674
+ api_calls=[Session.call],
675
+ custom_tags={"autogen": True} if self._autogenerated else None,
676
+ )
677
+ if output_cols_prefix == "fit_predict_":
678
+ if hasattr(self._sklearn_object, "n_clusters"):
679
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
680
+ num_examples = self._sklearn_object.n_clusters
681
+ elif hasattr(self._sklearn_object, "min_samples"):
682
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
683
+ num_examples = self._sklearn_object.min_samples
684
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
685
+ # LocalOutlierFactor expects n_neighbors <= n_samples
686
+ num_examples = self._sklearn_object.n_neighbors
687
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
688
+ else:
689
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
652
690
 
653
691
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
654
692
  # seen during the fit.
@@ -660,12 +698,14 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
660
698
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
661
699
  if self.sample_weight_col:
662
700
  output_df_columns_set -= set(self.sample_weight_col)
701
+
663
702
  # if the dimension of inferred output column names is correct; use it
664
703
  if len(expected_output_cols_list) == len(output_df_columns_set):
665
- return expected_output_cols_list
704
+ return expected_output_cols_list, output_df_pd
666
705
  # otherwise, use the sklearn estimator's output
667
706
  else:
668
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
707
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
708
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
669
709
 
670
710
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
671
711
  @telemetry.send_api_usage_telemetry(
@@ -711,7 +751,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
711
751
  drop_input_cols=self._drop_input_cols,
712
752
  expected_output_cols_type="float",
713
753
  )
714
- expected_output_cols = self._align_expected_output_names(
754
+ expected_output_cols, _ = self._align_expected_output(
715
755
  inference_method, dataset, expected_output_cols, output_cols_prefix
716
756
  )
717
757
 
@@ -777,7 +817,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
777
817
  drop_input_cols=self._drop_input_cols,
778
818
  expected_output_cols_type="float",
779
819
  )
780
- expected_output_cols = self._align_expected_output_names(
820
+ expected_output_cols, _ = self._align_expected_output(
781
821
  inference_method, dataset, expected_output_cols, output_cols_prefix
782
822
  )
783
823
  elif isinstance(dataset, pd.DataFrame):
@@ -840,7 +880,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
840
880
  drop_input_cols=self._drop_input_cols,
841
881
  expected_output_cols_type="float",
842
882
  )
843
- expected_output_cols = self._align_expected_output_names(
883
+ expected_output_cols, _ = self._align_expected_output(
844
884
  inference_method, dataset, expected_output_cols, output_cols_prefix
845
885
  )
846
886
 
@@ -905,7 +945,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
905
945
  drop_input_cols = self._drop_input_cols,
906
946
  expected_output_cols_type="float",
907
947
  )
908
- expected_output_cols = self._align_expected_output_names(
948
+ expected_output_cols, _ = self._align_expected_output(
909
949
  inference_method, dataset, expected_output_cols, output_cols_prefix
910
950
  )
911
951
 
@@ -4,14 +4,12 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
@@ -24,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
24
22
  from snowflake.ml._internal import telemetry
25
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
26
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
28
26
  from snowflake.snowpark import DataFrame, Session
29
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
32
- ModelTransformHandlers,
33
30
  BatchInferenceKwargsTypedDict,
34
31
  ScoreKwargsTypedDict
35
32
  )
@@ -558,12 +555,23 @@ class RadiusNeighborsClassifier(BaseTransformer):
558
555
  autogenerated=self._autogenerated,
559
556
  subproject=_SUBPROJECT,
560
557
  )
561
- output_result, fitted_estimator = model_trainer.train_fit_predict(
562
- drop_input_cols=self._drop_input_cols,
563
- expected_output_cols_list=(
564
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
565
- ),
558
+ expected_output_cols = (
559
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
566
560
  )
561
+ if isinstance(dataset, DataFrame):
562
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
563
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
564
+ )
565
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
566
+ drop_input_cols=self._drop_input_cols,
567
+ expected_output_cols_list=expected_output_cols,
568
+ example_output_pd_df=example_output_pd_df,
569
+ )
570
+ else:
571
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
572
+ drop_input_cols=self._drop_input_cols,
573
+ expected_output_cols_list=expected_output_cols,
574
+ )
567
575
  self._sklearn_object = fitted_estimator
568
576
  self._is_fitted = True
569
577
  return output_result
@@ -586,6 +594,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
586
594
  """
587
595
  self._infer_input_output_cols(dataset)
588
596
  super()._check_dataset_type(dataset)
597
+
589
598
  model_trainer = ModelTrainerBuilder.build_fit_transform(
590
599
  estimator=self._sklearn_object,
591
600
  dataset=dataset,
@@ -642,12 +651,41 @@ class RadiusNeighborsClassifier(BaseTransformer):
642
651
 
643
652
  return rv
644
653
 
645
- def _align_expected_output_names(
646
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
647
- ) -> List[str]:
654
+ def _align_expected_output(
655
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
656
+ ) -> Tuple[List[str], pd.DataFrame]:
657
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
658
+ and output dataframe with 1 line.
659
+ If the method is fit_predict, run 2 lines of data.
660
+ """
648
661
  # in case the inferred output column names dimension is different
649
662
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
650
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
663
+
664
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
665
+ # so change the minimum of number of rows to 2
666
+ num_examples = 2
667
+ statement_params = telemetry.get_function_usage_statement_params(
668
+ project=_PROJECT,
669
+ subproject=_SUBPROJECT,
670
+ function_name=telemetry.get_statement_params_full_func_name(
671
+ inspect.currentframe(), RadiusNeighborsClassifier.__class__.__name__
672
+ ),
673
+ api_calls=[Session.call],
674
+ custom_tags={"autogen": True} if self._autogenerated else None,
675
+ )
676
+ if output_cols_prefix == "fit_predict_":
677
+ if hasattr(self._sklearn_object, "n_clusters"):
678
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
679
+ num_examples = self._sklearn_object.n_clusters
680
+ elif hasattr(self._sklearn_object, "min_samples"):
681
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
682
+ num_examples = self._sklearn_object.min_samples
683
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
684
+ # LocalOutlierFactor expects n_neighbors <= n_samples
685
+ num_examples = self._sklearn_object.n_neighbors
686
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
687
+ else:
688
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
651
689
 
652
690
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
653
691
  # seen during the fit.
@@ -659,12 +697,14 @@ class RadiusNeighborsClassifier(BaseTransformer):
659
697
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
660
698
  if self.sample_weight_col:
661
699
  output_df_columns_set -= set(self.sample_weight_col)
700
+
662
701
  # if the dimension of inferred output column names is correct; use it
663
702
  if len(expected_output_cols_list) == len(output_df_columns_set):
664
- return expected_output_cols_list
703
+ return expected_output_cols_list, output_df_pd
665
704
  # otherwise, use the sklearn estimator's output
666
705
  else:
667
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
706
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
707
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
668
708
 
669
709
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
670
710
  @telemetry.send_api_usage_telemetry(
@@ -712,7 +752,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
712
752
  drop_input_cols=self._drop_input_cols,
713
753
  expected_output_cols_type="float",
714
754
  )
715
- expected_output_cols = self._align_expected_output_names(
755
+ expected_output_cols, _ = self._align_expected_output(
716
756
  inference_method, dataset, expected_output_cols, output_cols_prefix
717
757
  )
718
758
 
@@ -780,7 +820,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
780
820
  drop_input_cols=self._drop_input_cols,
781
821
  expected_output_cols_type="float",
782
822
  )
783
- expected_output_cols = self._align_expected_output_names(
823
+ expected_output_cols, _ = self._align_expected_output(
784
824
  inference_method, dataset, expected_output_cols, output_cols_prefix
785
825
  )
786
826
  elif isinstance(dataset, pd.DataFrame):
@@ -843,7 +883,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
843
883
  drop_input_cols=self._drop_input_cols,
844
884
  expected_output_cols_type="float",
845
885
  )
846
- expected_output_cols = self._align_expected_output_names(
886
+ expected_output_cols, _ = self._align_expected_output(
847
887
  inference_method, dataset, expected_output_cols, output_cols_prefix
848
888
  )
849
889
 
@@ -908,7 +948,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
908
948
  drop_input_cols = self._drop_input_cols,
909
949
  expected_output_cols_type="float",
910
950
  )
911
- expected_output_cols = self._align_expected_output_names(
951
+ expected_output_cols, _ = self._align_expected_output(
912
952
  inference_method, dataset, expected_output_cols, output_cols_prefix
913
953
  )
914
954
 
@@ -4,14 +4,12 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
@@ -24,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
24
22
  from snowflake.ml._internal import telemetry
25
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
26
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
28
26
  from snowflake.snowpark import DataFrame, Session
29
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
32
- ModelTransformHandlers,
33
30
  BatchInferenceKwargsTypedDict,
34
31
  ScoreKwargsTypedDict
35
32
  )
@@ -548,12 +545,23 @@ class RadiusNeighborsRegressor(BaseTransformer):
548
545
  autogenerated=self._autogenerated,
549
546
  subproject=_SUBPROJECT,
550
547
  )
551
- output_result, fitted_estimator = model_trainer.train_fit_predict(
552
- drop_input_cols=self._drop_input_cols,
553
- expected_output_cols_list=(
554
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
555
- ),
548
+ expected_output_cols = (
549
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
556
550
  )
551
+ if isinstance(dataset, DataFrame):
552
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
553
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
554
+ )
555
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
556
+ drop_input_cols=self._drop_input_cols,
557
+ expected_output_cols_list=expected_output_cols,
558
+ example_output_pd_df=example_output_pd_df,
559
+ )
560
+ else:
561
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
562
+ drop_input_cols=self._drop_input_cols,
563
+ expected_output_cols_list=expected_output_cols,
564
+ )
557
565
  self._sklearn_object = fitted_estimator
558
566
  self._is_fitted = True
559
567
  return output_result
@@ -576,6 +584,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
576
584
  """
577
585
  self._infer_input_output_cols(dataset)
578
586
  super()._check_dataset_type(dataset)
587
+
579
588
  model_trainer = ModelTrainerBuilder.build_fit_transform(
580
589
  estimator=self._sklearn_object,
581
590
  dataset=dataset,
@@ -632,12 +641,41 @@ class RadiusNeighborsRegressor(BaseTransformer):
632
641
 
633
642
  return rv
634
643
 
635
- def _align_expected_output_names(
636
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
637
- ) -> List[str]:
644
+ def _align_expected_output(
645
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
646
+ ) -> Tuple[List[str], pd.DataFrame]:
647
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
648
+ and output dataframe with 1 line.
649
+ If the method is fit_predict, run 2 lines of data.
650
+ """
638
651
  # in case the inferred output column names dimension is different
639
652
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
640
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
653
+
654
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
655
+ # so change the minimum of number of rows to 2
656
+ num_examples = 2
657
+ statement_params = telemetry.get_function_usage_statement_params(
658
+ project=_PROJECT,
659
+ subproject=_SUBPROJECT,
660
+ function_name=telemetry.get_statement_params_full_func_name(
661
+ inspect.currentframe(), RadiusNeighborsRegressor.__class__.__name__
662
+ ),
663
+ api_calls=[Session.call],
664
+ custom_tags={"autogen": True} if self._autogenerated else None,
665
+ )
666
+ if output_cols_prefix == "fit_predict_":
667
+ if hasattr(self._sklearn_object, "n_clusters"):
668
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
669
+ num_examples = self._sklearn_object.n_clusters
670
+ elif hasattr(self._sklearn_object, "min_samples"):
671
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
672
+ num_examples = self._sklearn_object.min_samples
673
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
674
+ # LocalOutlierFactor expects n_neighbors <= n_samples
675
+ num_examples = self._sklearn_object.n_neighbors
676
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
677
+ else:
678
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
641
679
 
642
680
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
643
681
  # seen during the fit.
@@ -649,12 +687,14 @@ class RadiusNeighborsRegressor(BaseTransformer):
649
687
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
650
688
  if self.sample_weight_col:
651
689
  output_df_columns_set -= set(self.sample_weight_col)
690
+
652
691
  # if the dimension of inferred output column names is correct; use it
653
692
  if len(expected_output_cols_list) == len(output_df_columns_set):
654
- return expected_output_cols_list
693
+ return expected_output_cols_list, output_df_pd
655
694
  # otherwise, use the sklearn estimator's output
656
695
  else:
657
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
696
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
697
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
658
698
 
659
699
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
660
700
  @telemetry.send_api_usage_telemetry(
@@ -700,7 +740,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
700
740
  drop_input_cols=self._drop_input_cols,
701
741
  expected_output_cols_type="float",
702
742
  )
703
- expected_output_cols = self._align_expected_output_names(
743
+ expected_output_cols, _ = self._align_expected_output(
704
744
  inference_method, dataset, expected_output_cols, output_cols_prefix
705
745
  )
706
746
 
@@ -766,7 +806,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
766
806
  drop_input_cols=self._drop_input_cols,
767
807
  expected_output_cols_type="float",
768
808
  )
769
- expected_output_cols = self._align_expected_output_names(
809
+ expected_output_cols, _ = self._align_expected_output(
770
810
  inference_method, dataset, expected_output_cols, output_cols_prefix
771
811
  )
772
812
  elif isinstance(dataset, pd.DataFrame):
@@ -829,7 +869,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
829
869
  drop_input_cols=self._drop_input_cols,
830
870
  expected_output_cols_type="float",
831
871
  )
832
- expected_output_cols = self._align_expected_output_names(
872
+ expected_output_cols, _ = self._align_expected_output(
833
873
  inference_method, dataset, expected_output_cols, output_cols_prefix
834
874
  )
835
875
 
@@ -894,7 +934,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
894
934
  drop_input_cols = self._drop_input_cols,
895
935
  expected_output_cols_type="float",
896
936
  )
897
- expected_output_cols = self._align_expected_output_names(
937
+ expected_output_cols, _ = self._align_expected_output(
898
938
  inference_method, dataset, expected_output_cols, output_cols_prefix
899
939
  )
900
940