snowflake-ml-python 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (322) hide show
  1. snowflake/cortex/__init__.py +7 -1
  2. snowflake/cortex/_classify_text.py +3 -3
  3. snowflake/cortex/_complete.py +23 -24
  4. snowflake/cortex/_embed_text_1024.py +4 -4
  5. snowflake/cortex/_embed_text_768.py +4 -4
  6. snowflake/cortex/_finetune.py +8 -8
  7. snowflake/cortex/_util.py +8 -12
  8. snowflake/ml/_internal/env.py +4 -3
  9. snowflake/ml/_internal/env_utils.py +63 -34
  10. snowflake/ml/_internal/file_utils.py +10 -21
  11. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +5 -7
  12. snowflake/ml/_internal/init_utils.py +2 -3
  13. snowflake/ml/_internal/lineage/lineage_utils.py +6 -6
  14. snowflake/ml/_internal/platform_capabilities.py +18 -16
  15. snowflake/ml/_internal/telemetry.py +39 -52
  16. snowflake/ml/_internal/type_utils.py +3 -3
  17. snowflake/ml/_internal/utils/db_utils.py +2 -2
  18. snowflake/ml/_internal/utils/identifier.py +10 -10
  19. snowflake/ml/_internal/utils/import_utils.py +2 -2
  20. snowflake/ml/_internal/utils/parallelize.py +7 -7
  21. snowflake/ml/_internal/utils/pkg_version_utils.py +11 -11
  22. snowflake/ml/_internal/utils/query_result_checker.py +4 -4
  23. snowflake/ml/_internal/utils/snowflake_env.py +28 -6
  24. snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +2 -2
  25. snowflake/ml/_internal/utils/sql_identifier.py +3 -3
  26. snowflake/ml/_internal/utils/table_manager.py +9 -9
  27. snowflake/ml/data/_internal/arrow_ingestor.py +7 -7
  28. snowflake/ml/data/data_connector.py +15 -36
  29. snowflake/ml/data/data_ingestor.py +4 -15
  30. snowflake/ml/data/data_source.py +2 -2
  31. snowflake/ml/data/ingestor_utils.py +3 -3
  32. snowflake/ml/data/torch_utils.py +5 -5
  33. snowflake/ml/dataset/dataset.py +11 -11
  34. snowflake/ml/dataset/dataset_metadata.py +8 -8
  35. snowflake/ml/dataset/dataset_reader.py +7 -7
  36. snowflake/ml/feature_store/__init__.py +1 -1
  37. snowflake/ml/feature_store/access_manager.py +7 -7
  38. snowflake/ml/feature_store/entity.py +6 -6
  39. snowflake/ml/feature_store/examples/airline_features/entities.py +1 -3
  40. snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +1 -3
  41. snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +1 -3
  42. snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +1 -3
  43. snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +1 -3
  44. snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +1 -3
  45. snowflake/ml/feature_store/examples/example_helper.py +16 -16
  46. snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +1 -3
  47. snowflake/ml/feature_store/examples/new_york_taxi_features/features/location_features.py +1 -3
  48. snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +1 -3
  49. snowflake/ml/feature_store/examples/wine_quality_features/entities.py +1 -3
  50. snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +1 -3
  51. snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +1 -3
  52. snowflake/ml/feature_store/feature_store.py +52 -64
  53. snowflake/ml/feature_store/feature_view.py +24 -24
  54. snowflake/ml/fileset/embedded_stage_fs.py +5 -5
  55. snowflake/ml/fileset/fileset.py +5 -5
  56. snowflake/ml/fileset/sfcfs.py +13 -13
  57. snowflake/ml/fileset/stage_fs.py +15 -15
  58. snowflake/ml/jobs/_utils/constants.py +1 -1
  59. snowflake/ml/jobs/_utils/interop_utils.py +10 -10
  60. snowflake/ml/jobs/_utils/payload_utils.py +45 -46
  61. snowflake/ml/jobs/_utils/scripts/get_instance_ip.py +4 -4
  62. snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +8 -5
  63. snowflake/ml/jobs/_utils/scripts/signal_workers.py +8 -8
  64. snowflake/ml/jobs/_utils/spec_utils.py +18 -29
  65. snowflake/ml/jobs/_utils/types.py +2 -2
  66. snowflake/ml/jobs/decorators.py +10 -5
  67. snowflake/ml/jobs/job.py +87 -30
  68. snowflake/ml/jobs/manager.py +86 -56
  69. snowflake/ml/lineage/lineage_node.py +5 -5
  70. snowflake/ml/model/_client/model/model_impl.py +3 -3
  71. snowflake/ml/model/_client/model/model_version_impl.py +103 -35
  72. snowflake/ml/model/_client/ops/metadata_ops.py +7 -7
  73. snowflake/ml/model/_client/ops/model_ops.py +41 -41
  74. snowflake/ml/model/_client/ops/service_ops.py +217 -32
  75. snowflake/ml/model/_client/service/model_deployment_spec.py +359 -65
  76. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +69 -24
  77. snowflake/ml/model/_client/sql/model.py +8 -8
  78. snowflake/ml/model/_client/sql/model_version.py +26 -26
  79. snowflake/ml/model/_client/sql/service.py +17 -26
  80. snowflake/ml/model/_client/sql/stage.py +2 -2
  81. snowflake/ml/model/_client/sql/tag.py +6 -6
  82. snowflake/ml/model/_model_composer/model_composer.py +58 -32
  83. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -16
  84. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +14 -13
  85. snowflake/ml/model/_model_composer/model_method/model_method.py +3 -3
  86. snowflake/ml/model/_packager/model_env/model_env.py +28 -25
  87. snowflake/ml/model/_packager/model_handler.py +4 -4
  88. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  89. snowflake/ml/model/_packager/model_handlers/_utils.py +47 -5
  90. snowflake/ml/model/_packager/model_handlers/catboost.py +5 -5
  91. snowflake/ml/model/_packager/model_handlers/custom.py +9 -5
  92. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +7 -21
  93. snowflake/ml/model/_packager/model_handlers/keras.py +4 -4
  94. snowflake/ml/model/_packager/model_handlers/lightgbm.py +4 -14
  95. snowflake/ml/model/_packager/model_handlers/mlflow.py +3 -3
  96. snowflake/ml/model/_packager/model_handlers/pytorch.py +5 -6
  97. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +5 -5
  98. snowflake/ml/model/_packager/model_handlers/sklearn.py +104 -46
  99. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +3 -3
  100. snowflake/ml/model/_packager/model_handlers/tensorflow.py +11 -8
  101. snowflake/ml/model/_packager/model_handlers/torchscript.py +6 -6
  102. snowflake/ml/model/_packager/model_handlers/xgboost.py +21 -22
  103. snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -2
  104. snowflake/ml/model/_packager/model_meta/model_meta.py +39 -38
  105. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +14 -11
  106. snowflake/ml/model/_packager/model_meta_migrator/base_migrator.py +3 -3
  107. snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -3
  108. snowflake/ml/model/_packager/model_meta_migrator/migrator_v1.py +4 -4
  109. snowflake/ml/model/_packager/model_packager.py +11 -9
  110. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +32 -1
  111. snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -2
  112. snowflake/ml/model/_signatures/core.py +16 -24
  113. snowflake/ml/model/_signatures/dmatrix_handler.py +17 -4
  114. snowflake/ml/model/_signatures/utils.py +6 -6
  115. snowflake/ml/model/custom_model.py +24 -11
  116. snowflake/ml/model/model_signature.py +12 -23
  117. snowflake/ml/model/models/huggingface_pipeline.py +7 -4
  118. snowflake/ml/model/type_hints.py +3 -3
  119. snowflake/ml/modeling/_internal/estimator_utils.py +7 -7
  120. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +6 -6
  121. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +7 -7
  122. snowflake/ml/modeling/_internal/model_specifications.py +8 -10
  123. snowflake/ml/modeling/_internal/model_trainer.py +5 -5
  124. snowflake/ml/modeling/_internal/model_trainer_builder.py +6 -6
  125. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +30 -30
  126. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +13 -13
  127. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +31 -31
  128. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +19 -19
  129. snowflake/ml/modeling/_internal/transformer_protocols.py +17 -17
  130. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +9 -1
  131. snowflake/ml/modeling/cluster/affinity_propagation.py +9 -1
  132. snowflake/ml/modeling/cluster/agglomerative_clustering.py +9 -1
  133. snowflake/ml/modeling/cluster/birch.py +9 -1
  134. snowflake/ml/modeling/cluster/bisecting_k_means.py +9 -1
  135. snowflake/ml/modeling/cluster/dbscan.py +9 -1
  136. snowflake/ml/modeling/cluster/feature_agglomeration.py +9 -1
  137. snowflake/ml/modeling/cluster/k_means.py +9 -1
  138. snowflake/ml/modeling/cluster/mean_shift.py +9 -1
  139. snowflake/ml/modeling/cluster/mini_batch_k_means.py +9 -1
  140. snowflake/ml/modeling/cluster/optics.py +9 -1
  141. snowflake/ml/modeling/cluster/spectral_biclustering.py +9 -1
  142. snowflake/ml/modeling/cluster/spectral_clustering.py +9 -1
  143. snowflake/ml/modeling/cluster/spectral_coclustering.py +9 -1
  144. snowflake/ml/modeling/compose/column_transformer.py +9 -1
  145. snowflake/ml/modeling/compose/transformed_target_regressor.py +9 -1
  146. snowflake/ml/modeling/covariance/elliptic_envelope.py +9 -1
  147. snowflake/ml/modeling/covariance/empirical_covariance.py +9 -1
  148. snowflake/ml/modeling/covariance/graphical_lasso.py +9 -1
  149. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +9 -1
  150. snowflake/ml/modeling/covariance/ledoit_wolf.py +9 -1
  151. snowflake/ml/modeling/covariance/min_cov_det.py +9 -1
  152. snowflake/ml/modeling/covariance/oas.py +9 -1
  153. snowflake/ml/modeling/covariance/shrunk_covariance.py +9 -1
  154. snowflake/ml/modeling/decomposition/dictionary_learning.py +9 -1
  155. snowflake/ml/modeling/decomposition/factor_analysis.py +9 -1
  156. snowflake/ml/modeling/decomposition/fast_ica.py +9 -1
  157. snowflake/ml/modeling/decomposition/incremental_pca.py +9 -1
  158. snowflake/ml/modeling/decomposition/kernel_pca.py +9 -1
  159. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +9 -1
  160. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +9 -1
  161. snowflake/ml/modeling/decomposition/pca.py +9 -1
  162. snowflake/ml/modeling/decomposition/sparse_pca.py +9 -1
  163. snowflake/ml/modeling/decomposition/truncated_svd.py +9 -1
  164. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +9 -1
  165. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +9 -1
  166. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +9 -1
  167. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +9 -1
  168. snowflake/ml/modeling/ensemble/bagging_classifier.py +9 -1
  169. snowflake/ml/modeling/ensemble/bagging_regressor.py +9 -1
  170. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +9 -1
  171. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +9 -1
  172. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +9 -1
  173. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +9 -1
  174. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +9 -1
  175. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +9 -1
  176. snowflake/ml/modeling/ensemble/isolation_forest.py +9 -1
  177. snowflake/ml/modeling/ensemble/random_forest_classifier.py +9 -1
  178. snowflake/ml/modeling/ensemble/random_forest_regressor.py +9 -1
  179. snowflake/ml/modeling/ensemble/stacking_regressor.py +9 -1
  180. snowflake/ml/modeling/ensemble/voting_classifier.py +9 -1
  181. snowflake/ml/modeling/ensemble/voting_regressor.py +9 -1
  182. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +9 -1
  183. snowflake/ml/modeling/feature_selection/select_fdr.py +9 -1
  184. snowflake/ml/modeling/feature_selection/select_fpr.py +9 -1
  185. snowflake/ml/modeling/feature_selection/select_fwe.py +9 -1
  186. snowflake/ml/modeling/feature_selection/select_k_best.py +9 -1
  187. snowflake/ml/modeling/feature_selection/select_percentile.py +9 -1
  188. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +9 -1
  189. snowflake/ml/modeling/feature_selection/variance_threshold.py +9 -1
  190. snowflake/ml/modeling/framework/_utils.py +10 -10
  191. snowflake/ml/modeling/framework/base.py +32 -32
  192. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +9 -1
  193. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +9 -1
  194. snowflake/ml/modeling/impute/__init__.py +1 -1
  195. snowflake/ml/modeling/impute/iterative_imputer.py +9 -1
  196. snowflake/ml/modeling/impute/knn_imputer.py +9 -1
  197. snowflake/ml/modeling/impute/missing_indicator.py +9 -1
  198. snowflake/ml/modeling/impute/simple_imputer.py +5 -5
  199. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +9 -1
  200. snowflake/ml/modeling/kernel_approximation/nystroem.py +9 -1
  201. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +9 -1
  202. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +9 -1
  203. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +9 -1
  204. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +9 -1
  205. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +9 -1
  206. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +9 -1
  207. snowflake/ml/modeling/linear_model/ard_regression.py +9 -1
  208. snowflake/ml/modeling/linear_model/bayesian_ridge.py +9 -1
  209. snowflake/ml/modeling/linear_model/elastic_net.py +9 -1
  210. snowflake/ml/modeling/linear_model/elastic_net_cv.py +9 -1
  211. snowflake/ml/modeling/linear_model/gamma_regressor.py +9 -1
  212. snowflake/ml/modeling/linear_model/huber_regressor.py +9 -1
  213. snowflake/ml/modeling/linear_model/lars.py +9 -1
  214. snowflake/ml/modeling/linear_model/lars_cv.py +9 -1
  215. snowflake/ml/modeling/linear_model/lasso.py +9 -1
  216. snowflake/ml/modeling/linear_model/lasso_cv.py +9 -1
  217. snowflake/ml/modeling/linear_model/lasso_lars.py +9 -1
  218. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +9 -1
  219. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +9 -1
  220. snowflake/ml/modeling/linear_model/linear_regression.py +9 -1
  221. snowflake/ml/modeling/linear_model/logistic_regression.py +9 -1
  222. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +9 -1
  223. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +9 -1
  224. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +9 -1
  225. snowflake/ml/modeling/linear_model/multi_task_lasso.py +9 -1
  226. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +9 -1
  227. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +9 -1
  228. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +9 -1
  229. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +9 -1
  230. snowflake/ml/modeling/linear_model/perceptron.py +9 -1
  231. snowflake/ml/modeling/linear_model/poisson_regressor.py +9 -1
  232. snowflake/ml/modeling/linear_model/ransac_regressor.py +9 -1
  233. snowflake/ml/modeling/linear_model/ridge.py +9 -1
  234. snowflake/ml/modeling/linear_model/ridge_classifier.py +9 -1
  235. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +9 -1
  236. snowflake/ml/modeling/linear_model/ridge_cv.py +9 -1
  237. snowflake/ml/modeling/linear_model/sgd_classifier.py +9 -1
  238. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +9 -1
  239. snowflake/ml/modeling/linear_model/sgd_regressor.py +9 -1
  240. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +9 -1
  241. snowflake/ml/modeling/linear_model/tweedie_regressor.py +9 -1
  242. snowflake/ml/modeling/manifold/isomap.py +9 -1
  243. snowflake/ml/modeling/manifold/mds.py +9 -1
  244. snowflake/ml/modeling/manifold/spectral_embedding.py +9 -1
  245. snowflake/ml/modeling/manifold/tsne.py +9 -1
  246. snowflake/ml/modeling/metrics/__init__.py +1 -1
  247. snowflake/ml/modeling/metrics/classification.py +39 -39
  248. snowflake/ml/modeling/metrics/metrics_utils.py +12 -12
  249. snowflake/ml/modeling/metrics/ranking.py +7 -7
  250. snowflake/ml/modeling/metrics/regression.py +13 -13
  251. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +9 -1
  252. snowflake/ml/modeling/mixture/gaussian_mixture.py +9 -1
  253. snowflake/ml/modeling/model_selection/__init__.py +1 -1
  254. snowflake/ml/modeling/model_selection/grid_search_cv.py +7 -7
  255. snowflake/ml/modeling/model_selection/randomized_search_cv.py +7 -7
  256. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +9 -1
  257. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +9 -1
  258. snowflake/ml/modeling/multiclass/output_code_classifier.py +9 -1
  259. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +9 -1
  260. snowflake/ml/modeling/naive_bayes/categorical_nb.py +9 -1
  261. snowflake/ml/modeling/naive_bayes/complement_nb.py +9 -1
  262. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +9 -1
  263. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +9 -1
  264. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +9 -1
  265. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +9 -1
  266. snowflake/ml/modeling/neighbors/kernel_density.py +9 -1
  267. snowflake/ml/modeling/neighbors/local_outlier_factor.py +9 -1
  268. snowflake/ml/modeling/neighbors/nearest_centroid.py +9 -1
  269. snowflake/ml/modeling/neighbors/nearest_neighbors.py +9 -1
  270. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +9 -1
  271. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +9 -1
  272. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +9 -1
  273. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +9 -1
  274. snowflake/ml/modeling/neural_network/mlp_classifier.py +9 -1
  275. snowflake/ml/modeling/neural_network/mlp_regressor.py +9 -1
  276. snowflake/ml/modeling/pipeline/__init__.py +1 -1
  277. snowflake/ml/modeling/pipeline/pipeline.py +18 -18
  278. snowflake/ml/modeling/preprocessing/__init__.py +1 -1
  279. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +13 -13
  280. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +4 -4
  281. snowflake/ml/modeling/preprocessing/min_max_scaler.py +8 -8
  282. snowflake/ml/modeling/preprocessing/normalizer.py +0 -1
  283. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +28 -28
  284. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +9 -9
  285. snowflake/ml/modeling/preprocessing/polynomial_features.py +9 -1
  286. snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -7
  287. snowflake/ml/modeling/preprocessing/standard_scaler.py +5 -5
  288. snowflake/ml/modeling/semi_supervised/label_propagation.py +9 -1
  289. snowflake/ml/modeling/semi_supervised/label_spreading.py +9 -1
  290. snowflake/ml/modeling/svm/linear_svc.py +9 -1
  291. snowflake/ml/modeling/svm/linear_svr.py +9 -1
  292. snowflake/ml/modeling/svm/nu_svc.py +9 -1
  293. snowflake/ml/modeling/svm/nu_svr.py +9 -1
  294. snowflake/ml/modeling/svm/svc.py +9 -1
  295. snowflake/ml/modeling/svm/svr.py +9 -1
  296. snowflake/ml/modeling/tree/decision_tree_classifier.py +9 -1
  297. snowflake/ml/modeling/tree/decision_tree_regressor.py +9 -1
  298. snowflake/ml/modeling/tree/extra_tree_classifier.py +9 -1
  299. snowflake/ml/modeling/tree/extra_tree_regressor.py +9 -1
  300. snowflake/ml/modeling/xgboost/xgb_classifier.py +9 -1
  301. snowflake/ml/modeling/xgboost/xgb_regressor.py +9 -1
  302. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +9 -1
  303. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +9 -1
  304. snowflake/ml/monitoring/_client/model_monitor_sql_client.py +26 -26
  305. snowflake/ml/monitoring/_manager/model_monitor_manager.py +5 -5
  306. snowflake/ml/monitoring/entities/model_monitor_config.py +6 -6
  307. snowflake/ml/monitoring/explain_visualize.py +286 -0
  308. snowflake/ml/registry/_manager/model_manager.py +55 -32
  309. snowflake/ml/registry/registry.py +39 -31
  310. snowflake/ml/utils/authentication.py +2 -2
  311. snowflake/ml/utils/connection_params.py +5 -5
  312. snowflake/ml/utils/sparse.py +5 -4
  313. snowflake/ml/utils/sql_client.py +1 -2
  314. snowflake/ml/version.py +2 -1
  315. {snowflake_ml_python-1.8.2.dist-info → snowflake_ml_python-1.8.4.dist-info}/METADATA +55 -14
  316. snowflake_ml_python-1.8.4.dist-info/RECORD +419 -0
  317. {snowflake_ml_python-1.8.2.dist-info → snowflake_ml_python-1.8.4.dist-info}/WHEEL +1 -1
  318. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +0 -1
  319. snowflake/ml/modeling/_internal/constants.py +0 -2
  320. snowflake_ml_python-1.8.2.dist-info/RECORD +0 -420
  321. {snowflake_ml_python-1.8.2.dist-info → snowflake_ml_python-1.8.4.dist-info}/licenses/LICENSE.txt +0 -0
  322. {snowflake_ml_python-1.8.2.dist-info → snowflake_ml_python-1.8.4.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,7 @@ import cloudpickle as cp
11
11
  import numpy as np
12
12
  import pandas as pd
13
13
  from numpy import typing as npt
14
-
14
+ from packaging import version
15
15
 
16
16
  import numpy
17
17
  import sklearn
@@ -60,6 +60,14 @@ DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
60
60
 
61
61
  INFER_SIGNATURE_MAX_ROWS = 100
62
62
 
63
+ SKLEARN_LOWER, SKLEARN_UPPER = ('1.4', '1.6')
64
+ # Modeling library estimators require a smaller sklearn version range.
65
+ if not version.Version(SKLEARN_LOWER) <= version.Version(sklearn.__version__) < version.Version(SKLEARN_UPPER):
66
+ raise Exception(
67
+ f"To use the modeling library, install scikit-learn version >= {SKLEARN_LOWER} and < {SKLEARN_UPPER}"
68
+ )
69
+
70
+
63
71
  class RadiusNeighborsClassifier(BaseTransformer):
64
72
  r"""Classifier implementing a vote among neighbors within a given radius
65
73
  For more details on this class, see [sklearn.neighbors.RadiusNeighborsClassifier]
@@ -11,7 +11,7 @@ import cloudpickle as cp
11
11
  import numpy as np
12
12
  import pandas as pd
13
13
  from numpy import typing as npt
14
-
14
+ from packaging import version
15
15
 
16
16
  import numpy
17
17
  import sklearn
@@ -60,6 +60,14 @@ DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
60
60
 
61
61
  INFER_SIGNATURE_MAX_ROWS = 100
62
62
 
63
+ SKLEARN_LOWER, SKLEARN_UPPER = ('1.4', '1.6')
64
+ # Modeling library estimators require a smaller sklearn version range.
65
+ if not version.Version(SKLEARN_LOWER) <= version.Version(sklearn.__version__) < version.Version(SKLEARN_UPPER):
66
+ raise Exception(
67
+ f"To use the modeling library, install scikit-learn version >= {SKLEARN_LOWER} and < {SKLEARN_UPPER}"
68
+ )
69
+
70
+
63
71
  class RadiusNeighborsRegressor(BaseTransformer):
64
72
  r"""Regression based on neighbors within a fixed radius
65
73
  For more details on this class, see [sklearn.neighbors.RadiusNeighborsRegressor]
@@ -11,7 +11,7 @@ import cloudpickle as cp
11
11
  import numpy as np
12
12
  import pandas as pd
13
13
  from numpy import typing as npt
14
-
14
+ from packaging import version
15
15
 
16
16
  import numpy
17
17
  import sklearn
@@ -60,6 +60,14 @@ DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
60
60
 
61
61
  INFER_SIGNATURE_MAX_ROWS = 100
62
62
 
63
+ SKLEARN_LOWER, SKLEARN_UPPER = ('1.4', '1.6')
64
+ # Modeling library estimators require a smaller sklearn version range.
65
+ if not version.Version(SKLEARN_LOWER) <= version.Version(sklearn.__version__) < version.Version(SKLEARN_UPPER):
66
+ raise Exception(
67
+ f"To use the modeling library, install scikit-learn version >= {SKLEARN_LOWER} and < {SKLEARN_UPPER}"
68
+ )
69
+
70
+
63
71
  class BernoulliRBM(BaseTransformer):
64
72
  r"""Bernoulli Restricted Boltzmann Machine (RBM)
65
73
  For more details on this class, see [sklearn.neural_network.BernoulliRBM]
@@ -11,7 +11,7 @@ import cloudpickle as cp
11
11
  import numpy as np
12
12
  import pandas as pd
13
13
  from numpy import typing as npt
14
-
14
+ from packaging import version
15
15
 
16
16
  import numpy
17
17
  import sklearn
@@ -60,6 +60,14 @@ DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
60
60
 
61
61
  INFER_SIGNATURE_MAX_ROWS = 100
62
62
 
63
+ SKLEARN_LOWER, SKLEARN_UPPER = ('1.4', '1.6')
64
+ # Modeling library estimators require a smaller sklearn version range.
65
+ if not version.Version(SKLEARN_LOWER) <= version.Version(sklearn.__version__) < version.Version(SKLEARN_UPPER):
66
+ raise Exception(
67
+ f"To use the modeling library, install scikit-learn version >= {SKLEARN_LOWER} and < {SKLEARN_UPPER}"
68
+ )
69
+
70
+
63
71
  class MLPClassifier(BaseTransformer):
64
72
  r"""Multi-layer Perceptron classifier
65
73
  For more details on this class, see [sklearn.neural_network.MLPClassifier]
@@ -11,7 +11,7 @@ import cloudpickle as cp
11
11
  import numpy as np
12
12
  import pandas as pd
13
13
  from numpy import typing as npt
14
-
14
+ from packaging import version
15
15
 
16
16
  import numpy
17
17
  import sklearn
@@ -60,6 +60,14 @@ DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
60
60
 
61
61
  INFER_SIGNATURE_MAX_ROWS = 100
62
62
 
63
+ SKLEARN_LOWER, SKLEARN_UPPER = ('1.4', '1.6')
64
+ # Modeling library estimators require a smaller sklearn version range.
65
+ if not version.Version(SKLEARN_LOWER) <= version.Version(sklearn.__version__) < version.Version(SKLEARN_UPPER):
66
+ raise Exception(
67
+ f"To use the modeling library, install scikit-learn version >= {SKLEARN_LOWER} and < {SKLEARN_UPPER}"
68
+ )
69
+
70
+
63
71
  class MLPRegressor(BaseTransformer):
64
72
  r"""Multi-layer Perceptron regressor
65
73
  For more details on this class, see [sklearn.neural_network.MLPRegressor]
@@ -2,7 +2,7 @@ import os
2
2
 
3
3
  from snowflake.ml._internal import init_utils
4
4
 
5
- pkg_dir = os.path.dirname(os.path.abspath(__file__))
5
+ pkg_dir = os.path.dirname(__file__)
6
6
  pkg_name = __name__
7
7
  exportable_classes = init_utils.fetch_classes_from_modules_in_pkg_dir(pkg_dir=pkg_dir, pkg_name=pkg_name)
8
8
  for k, v in exportable_classes.items():
@@ -4,7 +4,7 @@ import os
4
4
  import posixpath
5
5
  import tempfile
6
6
  from itertools import chain
7
- from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
7
+ from typing import Any, Callable, Optional, Union
8
8
 
9
9
  import cloudpickle as cp
10
10
  import numpy as np
@@ -63,7 +63,7 @@ def has_callable_attr(obj: object, attr: str) -> bool:
63
63
  return callable(getattr(obj, attr, None))
64
64
 
65
65
 
66
- def _get_column_indices(all_columns: List[str], target_columns: List[str]) -> List[int]:
66
+ def _get_column_indices(all_columns: list[str], target_columns: list[str]) -> list[int]:
67
67
  """
68
68
  Extract the indices of the target_columns from all_columns.
69
69
 
@@ -96,7 +96,7 @@ def _get_column_indices(all_columns: List[str], target_columns: List[str]) -> Li
96
96
 
97
97
 
98
98
  class Pipeline(base.BaseTransformer):
99
- def __init__(self, steps: List[Tuple[str, Any]]) -> None:
99
+ def __init__(self, steps: list[tuple[str, Any]]) -> None:
100
100
  """
101
101
  Pipeline of transforms.
102
102
 
@@ -119,14 +119,14 @@ class Pipeline(base.BaseTransformer):
119
119
  # to only work with list of steps ending with an estimator or a dummy estimator like SKLearn?
120
120
  self._is_final_step_estimator = Pipeline._is_estimator(steps[-1][1])
121
121
  self._is_fitted = False
122
- self._feature_names_in: List[np.ndarray[Any, np.dtype[Any]]] = []
123
- self._n_features_in: List[int] = []
124
- self._transformers_to_input_indices: Dict[str, List[int]] = {}
122
+ self._feature_names_in: list[np.ndarray[Any, np.dtype[Any]]] = []
123
+ self._n_features_in: list[int] = []
124
+ self._transformers_to_input_indices: dict[str, list[int]] = {}
125
125
  self._modifies_label_or_sample_weight = True
126
126
 
127
- self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
127
+ self._model_signature_dict: Optional[dict[str, ModelSignature]] = None
128
128
 
129
- deps: Set[str] = {f"pandas=={pd.__version__}", f"scikit-learn=={skversion}"}
129
+ deps: set[str] = {f"pandas=={pd.__version__}", f"scikit-learn=={skversion}"}
130
130
  for _, obj in steps:
131
131
  if isinstance(obj, base.BaseTransformer):
132
132
  deps = deps | set(obj._get_dependencies())
@@ -146,10 +146,10 @@ class Pipeline(base.BaseTransformer):
146
146
  def _is_transformer(obj: object) -> bool:
147
147
  return has_callable_attr(obj, "fit") and has_callable_attr(obj, "transform")
148
148
 
149
- def _get_transformers(self) -> List[Tuple[str, Any]]:
149
+ def _get_transformers(self) -> list[tuple[str, Any]]:
150
150
  return self.steps[:-1] if self._is_final_step_estimator else self.steps
151
151
 
152
- def _get_estimator(self) -> Optional[Tuple[str, Any]]:
152
+ def _get_estimator(self) -> Optional[tuple[str, Any]]:
153
153
  return self.steps[-1] if self._is_final_step_estimator else None
154
154
 
155
155
  def _validate_steps(self) -> None:
@@ -215,7 +215,7 @@ class Pipeline(base.BaseTransformer):
215
215
  processed_cols = set(chain.from_iterable([trans.get_input_cols() for (_, trans) in self._get_transformers()]))
216
216
  return len(target_cols & processed_cols) > 0
217
217
 
218
- def _get_sanitized_list_of_columns(self, columns: List[str]) -> List[str]:
218
+ def _get_sanitized_list_of_columns(self, columns: list[str]) -> list[str]:
219
219
  """
220
220
  Removes the label and sample_weight columns from the input list of columns and returns the results for the
221
221
  purpous of computing column indices for SKLearn ColumnTransformer objects.
@@ -237,7 +237,7 @@ class Pipeline(base.BaseTransformer):
237
237
 
238
238
  return [c for c in columns if c not in target_cols]
239
239
 
240
- def _append_step_feature_consumption_info(self, step_name: str, all_cols: List[str], input_cols: List[str]) -> None:
240
+ def _append_step_feature_consumption_info(self, step_name: str, all_cols: list[str], input_cols: list[str]) -> None:
241
241
  if self._modifies_label_or_sample_weight:
242
242
  all_cols = self._get_sanitized_list_of_columns(all_cols)
243
243
  self._feature_names_in.append(np.asarray(all_cols, dtype=object))
@@ -269,7 +269,7 @@ class Pipeline(base.BaseTransformer):
269
269
 
270
270
  return transformed_dataset
271
271
 
272
- def _upload_model_to_stage(self, stage_name: str, estimator: object, session: Session) -> Tuple[str, str]:
272
+ def _upload_model_to_stage(self, stage_name: str, estimator: object, session: Session) -> tuple[str, str]:
273
273
  """
274
274
  Util method to pickle and upload the model to a temp Snowflake stage.
275
275
 
@@ -331,10 +331,10 @@ class Pipeline(base.BaseTransformer):
331
331
 
332
332
  def pipeline_within_one_sproc(
333
333
  session: Session,
334
- sql_queries: List[str],
334
+ sql_queries: list[str],
335
335
  stage_estimator_file_name: str,
336
336
  stage_result_file_name: str,
337
- sproc_statement_params: Dict[str, str],
337
+ sproc_statement_params: dict[str, str],
338
338
  ) -> str:
339
339
  import os
340
340
 
@@ -774,7 +774,7 @@ class Pipeline(base.BaseTransformer):
774
774
 
775
775
  return ct
776
776
 
777
- def _get_label_cols(self) -> List[str]:
777
+ def _get_label_cols(self) -> list[str]:
778
778
  """Util function to get the label columns from the pipeline.
779
779
  The label column is only present in the estimator
780
780
 
@@ -885,7 +885,7 @@ class Pipeline(base.BaseTransformer):
885
885
 
886
886
  return pipeline.Pipeline(steps=sksteps)
887
887
 
888
- def _get_dependencies(self) -> List[str]:
888
+ def _get_dependencies(self) -> list[str]:
889
889
  return self._deps
890
890
 
891
891
  def _generate_model_signatures(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> None:
@@ -919,7 +919,7 @@ class Pipeline(base.BaseTransformer):
919
919
  )
920
920
 
921
921
  @property
922
- def model_signatures(self) -> Dict[str, ModelSignature]:
922
+ def model_signatures(self) -> dict[str, ModelSignature]:
923
923
  if self._model_signature_dict is None:
924
924
  raise exceptions.SnowflakeMLException(
925
925
  error_code=error_codes.INVALID_ATTRIBUTE,
@@ -2,7 +2,7 @@ import os
2
2
 
3
3
  from snowflake.ml._internal import init_utils
4
4
 
5
- pkg_dir = os.path.dirname(os.path.abspath(__file__))
5
+ pkg_dir = os.path.dirname(__file__)
6
6
  pkg_name = __name__
7
7
  exportable_classes = init_utils.fetch_classes_from_modules_in_pkg_dir(pkg_dir=pkg_dir, pkg_name=pkg_name)
8
8
  for k, v in exportable_classes.items():
@@ -2,7 +2,7 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  from itertools import chain
5
- from typing import Dict, Iterable, List, Optional, Union, cast
5
+ from typing import Iterable, Optional, Union, cast
6
6
 
7
7
  import numpy as np
8
8
  import numpy.typing as npt
@@ -104,7 +104,7 @@ class KBinsDiscretizer(base.BaseTransformer):
104
104
  def __init__(
105
105
  self,
106
106
  *,
107
- n_bins: Union[int, List[int]] = 5,
107
+ n_bins: Union[int, list[int]] = 5,
108
108
  encode: str = "onehot",
109
109
  strategy: str = "quantile",
110
110
  input_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -229,7 +229,7 @@ class KBinsDiscretizer(base.BaseTransformer):
229
229
  # https://docs.google.com/document/d/1cilfCCtKYv6HvHqaqdZxfHAvQ0gg-t1AM8KYCQtJiLE/edit
230
230
  agg_queries = []
231
231
  for idx, col_name in enumerate(self.input_cols):
232
- percentiles = np.linspace(0, 1, cast(List[int], self.n_bins)[idx] + 1)
232
+ percentiles = np.linspace(0, 1, cast(list[int], self.n_bins)[idx] + 1)
233
233
  for i, pct in enumerate(percentiles.tolist()):
234
234
  agg_queries.append(F.percentile_cont(pct).within_group(col_name).alias(f"{col_name}_pct_{i}"))
235
235
  state_df = dataset.agg(agg_queries)
@@ -246,7 +246,7 @@ class KBinsDiscretizer(base.BaseTransformer):
246
246
  self.bin_edges_ = np.zeros(len(self.input_cols), dtype=object)
247
247
  self.n_bins_ = np.zeros(len(self.input_cols), dtype=np.int_)
248
248
  start = 0
249
- for i, b in enumerate(cast(List[int], self.n_bins)):
249
+ for i, b in enumerate(cast(list[int], self.n_bins)):
250
250
  self.bin_edges_[i] = decimal_to_float(state[start : start + b + 1])
251
251
  start += b + 1
252
252
  self.n_bins_[i] = len(self.bin_edges_[i]) - 1
@@ -275,7 +275,7 @@ class KBinsDiscretizer(base.BaseTransformer):
275
275
  # 2. Populate internal state variables
276
276
  self.bin_edges_ = np.zeros(len(self.input_cols), dtype=object)
277
277
  self.n_bins_ = np.zeros(len(self.input_cols), dtype=np.int_)
278
- for i, b in enumerate(cast(List[int], self.n_bins)):
278
+ for i, b in enumerate(cast(list[int], self.n_bins)):
279
279
  self.bin_edges_[i] = np.linspace(state[i * 2], state[i * 2 + 1], b + 1)
280
280
  self.n_bins_[i] = len(self.bin_edges_[i]) - 1
281
281
 
@@ -345,7 +345,7 @@ class KBinsDiscretizer(base.BaseTransformer):
345
345
  session=dataset._session,
346
346
  statement_params=telemetry.get_statement_params(base.PROJECT, base.SUBPROJECT, self.__class__.__name__),
347
347
  )
348
- def vec_bucketize_temp(x: T.PandasSeries[float], boarders: T.PandasSeries[List[float]]) -> T.PandasSeries[int]:
348
+ def vec_bucketize_temp(x: T.PandasSeries[float], boarders: T.PandasSeries[list[float]]) -> T.PandasSeries[int]:
349
349
  # NB: vectorized udf doesn't work well with const array arg, so we pass it in as a list via PandasSeries
350
350
  boarders = boarders[0]
351
351
  res = np.searchsorted(boarders[1:-1], x, side="right")
@@ -387,9 +387,9 @@ class KBinsDiscretizer(base.BaseTransformer):
387
387
  statement_params=telemetry.get_statement_params(base.PROJECT, base.SUBPROJECT, self.__class__.__name__),
388
388
  )
389
389
  def vec_bucketize_sparse_output_temp(
390
- x: T.PandasSeries[float], boarders: T.PandasSeries[List[float]]
391
- ) -> T.PandasSeries[Dict[str, int]]:
392
- res: List[Dict[str, int]] = []
390
+ x: T.PandasSeries[float], boarders: T.PandasSeries[list[float]]
391
+ ) -> T.PandasSeries[dict[str, int]]:
392
+ res: list[dict[str, int]] = []
393
393
  boarders = boarders[0]
394
394
  buckets = np.searchsorted(boarders[1:-1], x, side="right")
395
395
  assert isinstance(buckets, np.ndarray), f"expecting buckets to be numpy ndarray, got {type(buckets)}"
@@ -434,9 +434,9 @@ class KBinsDiscretizer(base.BaseTransformer):
434
434
  statement_params=telemetry.get_statement_params(base.PROJECT, base.SUBPROJECT, self.__class__.__name__),
435
435
  )
436
436
  def vec_bucketize_dense_output_temp(
437
- x: T.PandasSeries[float], boarders: T.PandasSeries[List[float]]
438
- ) -> T.PandasSeries[List[int]]:
439
- res: List[npt.NDArray[np.int32]] = []
437
+ x: T.PandasSeries[float], boarders: T.PandasSeries[list[float]]
438
+ ) -> T.PandasSeries[list[int]]:
439
+ res: list[npt.NDArray[np.int32]] = []
440
440
  boarders = boarders[0]
441
441
  buckets = np.searchsorted(boarders[1:-1], x, side="right")
442
442
  assert isinstance(buckets, np.ndarray), f"expecting buckets to be numpy ndarray, got {type(buckets)}"
@@ -491,7 +491,7 @@ class KBinsDiscretizer(base.BaseTransformer):
491
491
  else:
492
492
  return transformed_dataset
493
493
 
494
- def get_output_cols(self) -> List[str]:
494
+ def get_output_cols(self) -> list[str]:
495
495
  """
496
496
  Get output column names.
497
497
  Expand output column names for 'onehot-dense' encoding.
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python3
2
- from typing import Dict, Iterable, List, Optional, Union
2
+ from typing import Iterable, Optional, Union
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -88,10 +88,10 @@ class MaxAbsScaler(base.BaseTransformer):
88
88
  max_abs_: dict {column_name: value} or None
89
89
  Per feature maximum absolute value.
90
90
  """
91
- self.max_abs_: Dict[str, float] = {}
92
- self.scale_: Dict[str, float] = {}
91
+ self.max_abs_: dict[str, float] = {}
92
+ self.scale_: dict[str, float] = {}
93
93
 
94
- self.custom_states: List[str] = [
94
+ self.custom_states: list[str] = [
95
95
  "SQL>>>max(abs({col_name}))",
96
96
  ]
97
97
 
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python3
2
- from typing import Dict, Iterable, List, Optional, Tuple, Union
2
+ from typing import Iterable, Optional, Union
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -69,7 +69,7 @@ class MinMaxScaler(base.BaseTransformer):
69
69
  def __init__(
70
70
  self,
71
71
  *,
72
- feature_range: Tuple[float, float] = (0, 1),
72
+ feature_range: tuple[float, float] = (0, 1),
73
73
  clip: bool = False,
74
74
  input_cols: Optional[Union[str, Iterable[str]]] = None,
75
75
  output_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -101,13 +101,13 @@ class MinMaxScaler(base.BaseTransformer):
101
101
  self.feature_range = feature_range
102
102
  self.clip = clip
103
103
 
104
- self.min_: Dict[str, float] = {}
105
- self.scale_: Dict[str, float] = {}
106
- self.data_min_: Dict[str, float] = {}
107
- self.data_max_: Dict[str, float] = {}
108
- self.data_range_: Dict[str, float] = {}
104
+ self.min_: dict[str, float] = {}
105
+ self.scale_: dict[str, float] = {}
106
+ self.data_min_: dict[str, float] = {}
107
+ self.data_max_: dict[str, float] = {}
108
+ self.data_range_: dict[str, float] = {}
109
109
 
110
- self.custom_states: List[str] = [_utils.NumericStatistics.MIN, _utils.NumericStatistics.MAX]
110
+ self.custom_states: list[str] = [_utils.NumericStatistics.MIN, _utils.NumericStatistics.MAX]
111
111
 
112
112
  super().__init__(drop_input_cols=drop_input_cols, custom_states=self.custom_states)
113
113
 
@@ -72,7 +72,6 @@ class Normalizer(base.BaseTransformer):
72
72
 
73
73
  This is a stateless transformer, so there is nothing to reset.
74
74
  """
75
- pass
76
75
 
77
76
  def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "Normalizer":
78
77
  """
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  import numbers
3
3
  import uuid
4
- from typing import Any, Dict, Iterable, List, Optional, Union
4
+ from typing import Any, Iterable, Optional, Union
5
5
 
6
6
  import numpy as np
7
7
  import numpy.typing as npt
@@ -214,7 +214,7 @@ class OneHotEncoder(base.BaseTransformer):
214
214
  def __init__(
215
215
  self,
216
216
  *,
217
- categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]] = "auto",
217
+ categories: Union[str, list[type_utils.LiteralNDArrayType], dict[str, type_utils.LiteralNDArrayType]] = "auto",
218
218
  drop: Optional[Union[str, npt.ArrayLike]] = None,
219
219
  sparse: bool = False,
220
220
  handle_unknown: str = "error",
@@ -238,23 +238,23 @@ class OneHotEncoder(base.BaseTransformer):
238
238
  ) or self.min_frequency is not None
239
239
 
240
240
  # Fit state
241
- self.categories_: Dict[str, type_utils.LiteralNDArrayType] = {}
242
- self._categories_list: List[type_utils.LiteralNDArrayType] = []
241
+ self.categories_: dict[str, type_utils.LiteralNDArrayType] = {}
242
+ self._categories_list: list[type_utils.LiteralNDArrayType] = []
243
243
  self.drop_idx_: Optional[npt.NDArray[np.int_]] = None
244
244
  self._drop_idx_after_grouping: Optional[npt.NDArray[np.int_]] = None
245
- self._n_features_outs: List[int] = []
246
- self._snowpark_cols: Dict[str, List[str]] = dict()
245
+ self._n_features_outs: list[int] = []
246
+ self._snowpark_cols: dict[str, list[str]] = dict()
247
247
 
248
248
  # Fit state if output columns are set before fitting
249
- self._dense_output_cols_mappings: Dict[str, List[str]] = {}
250
- self._inferred_output_cols: List[str] = []
249
+ self._dense_output_cols_mappings: dict[str, list[str]] = {}
250
+ self._inferred_output_cols: list[str] = []
251
251
 
252
252
  self.set_input_cols(input_cols)
253
253
  self.set_output_cols(output_cols)
254
254
  self.set_passthrough_cols(passthrough_cols)
255
255
 
256
256
  @property
257
- def infrequent_categories_(self) -> List[Optional[type_utils.LiteralNDArrayType]]:
257
+ def infrequent_categories_(self) -> list[Optional[type_utils.LiteralNDArrayType]]:
258
258
  """Infrequent categories for each feature."""
259
259
  # raises an AttributeError if `_infrequent_indices` is not defined
260
260
  infrequent_indices = self._infrequent_indices
@@ -329,7 +329,7 @@ class OneHotEncoder(base.BaseTransformer):
329
329
  self._drop_idx_after_grouping = sklearn_encoder.drop_idx_
330
330
  self._n_features_outs = sklearn_encoder._n_features_outs
331
331
 
332
- _state_pandas_counts: List[pd.DataFrame] = []
332
+ _state_pandas_counts: list[pd.DataFrame] = []
333
333
  for idx, input_col in enumerate(self.input_cols):
334
334
  self.categories_[input_col] = self._categories_list[idx]
335
335
  _column_counts = (
@@ -362,7 +362,7 @@ class OneHotEncoder(base.BaseTransformer):
362
362
  self._n_features_outs = self._compute_n_features_outs()
363
363
  self._update_categories_state()
364
364
 
365
- def _fit_category_state(self, dataset: snowpark.DataFrame, return_counts: bool) -> Dict[str, Any]:
365
+ def _fit_category_state(self, dataset: snowpark.DataFrame, return_counts: bool) -> dict[str, Any]:
366
366
  """
367
367
  Get the number of samples, categories and (optional) category counts of dataset.
368
368
  Fitted categories are assigned to the object.
@@ -552,7 +552,7 @@ class OneHotEncoder(base.BaseTransformer):
552
552
  categories_pandas = categories_pandas.rename(columns={_STATE: categories_col})
553
553
 
554
554
  # {column_name: ndarray([category])}
555
- categories: Dict[str, type_utils.LiteralNDArrayType] = categories_pandas.set_index(_COLUMN_NAME).to_dict()[
555
+ categories: dict[str, type_utils.LiteralNDArrayType] = categories_pandas.set_index(_COLUMN_NAME).to_dict()[
556
556
  categories_col
557
557
  ]
558
558
  # Giving the original type back to categories.
@@ -769,7 +769,7 @@ class OneHotEncoder(base.BaseTransformer):
769
769
 
770
770
  state_pandas = self._state_pandas
771
771
 
772
- def map_encoded_value(row: pd.Series) -> Dict[str, Any]:
772
+ def map_encoded_value(row: pd.Series) -> dict[str, Any]:
773
773
  n_features_out = row[_N_FEATURES_OUT]
774
774
  encoding = row[_ENCODING]
775
775
  encoded_value = {str(encoding): 1, "array_length": n_features_out}
@@ -836,7 +836,7 @@ class OneHotEncoder(base.BaseTransformer):
836
836
  """
837
837
  state_pandas = self._state_pandas
838
838
 
839
- def map_encoded_value(row: pd.Series) -> List[int]:
839
+ def map_encoded_value(row: pd.Series) -> list[int]:
840
840
  n_features_out = row[_N_FEATURES_OUT]
841
841
  encoding = row[_ENCODING]
842
842
  encoded_value = [0] * n_features_out
@@ -934,7 +934,7 @@ class OneHotEncoder(base.BaseTransformer):
934
934
  packages=["numpy", "scikit-learn"],
935
935
  statement_params=telemetry.get_statement_params(base.PROJECT, base.SUBPROJECT, self.__class__.__name__),
936
936
  )
937
- def one_hot_encoder_sparse_transform(data: pd.DataFrame) -> List[List[Optional[Dict[Any, Any]]]]:
937
+ def one_hot_encoder_sparse_transform(data: pd.DataFrame) -> list[list[Optional[dict[Any, Any]]]]:
938
938
  data = data.replace({np.nan: None}) # fill NA with None as represented in `categories_`
939
939
  transformed_csr = encoder_sklearn.transform(data)
940
940
  transformed_coo = transformed_csr.tocoo()
@@ -943,7 +943,7 @@ class OneHotEncoder(base.BaseTransformer):
943
943
  transformed_vals = []
944
944
  for _, row in data.iterrows():
945
945
  base_encoding = 0
946
- row_transformed_vals: List[Optional[Dict[Any, Any]]] = []
946
+ row_transformed_vals: list[Optional[dict[Any, Any]]] = []
947
947
  for col_idx, val in row.items():
948
948
  if val in encoder_sklearn.categories_[col_idx] or encoder_sklearn.handle_unknown != "ignore":
949
949
  if col_idx > 0:
@@ -1101,7 +1101,7 @@ class OneHotEncoder(base.BaseTransformer):
1101
1101
  def _handle_unknown_in_transform(
1102
1102
  self,
1103
1103
  transformed_dataset: snowpark.DataFrame,
1104
- input_cols: Optional[List[str]] = None,
1104
+ input_cols: Optional[list[str]] = None,
1105
1105
  ) -> snowpark.DataFrame:
1106
1106
  """
1107
1107
  Handle unknown values in the transformed dataset.
@@ -1206,7 +1206,7 @@ class OneHotEncoder(base.BaseTransformer):
1206
1206
  if not self._infrequent_enabled:
1207
1207
  return drop_idx
1208
1208
 
1209
- default_to_infrequent: Optional[List[int]] = self._default_to_infrequent_mappings[feature_idx]
1209
+ default_to_infrequent: Optional[list[int]] = self._default_to_infrequent_mappings[feature_idx]
1210
1210
  if default_to_infrequent is None:
1211
1211
  return drop_idx
1212
1212
 
@@ -1346,7 +1346,7 @@ class OneHotEncoder(base.BaseTransformer):
1346
1346
  self.drop_idx_ = np.asarray(drop_idx_, dtype=object)
1347
1347
 
1348
1348
  def _fit_infrequent_category_mapping(
1349
- self, n_samples: int, category_counts: Dict[str, Dict[str, Dict[str, int]]]
1349
+ self, n_samples: int, category_counts: dict[str, dict[str, dict[str, int]]]
1350
1350
  ) -> None:
1351
1351
  """
1352
1352
  Fit infrequent categories.
@@ -1442,7 +1442,7 @@ class OneHotEncoder(base.BaseTransformer):
1442
1442
  output = np.flatnonzero(infrequent_mask)
1443
1443
  return output if output.size > 0 else None
1444
1444
 
1445
- def _compute_n_features_outs(self) -> List[int]:
1445
+ def _compute_n_features_outs(self) -> list[int]:
1446
1446
  """Compute the n_features_out for each input feature."""
1447
1447
  output = [len(cats) for cats in self._categories_list]
1448
1448
 
@@ -1463,7 +1463,7 @@ class OneHotEncoder(base.BaseTransformer):
1463
1463
 
1464
1464
  return output
1465
1465
 
1466
- def get_output_cols(self) -> List[str]:
1466
+ def get_output_cols(self) -> list[str]:
1467
1467
  """
1468
1468
  Output columns getter.
1469
1469
 
@@ -1472,7 +1472,7 @@ class OneHotEncoder(base.BaseTransformer):
1472
1472
  """
1473
1473
  return self._inferred_output_cols
1474
1474
 
1475
- def _get_inferred_output_cols(self) -> List[str]:
1475
+ def _get_inferred_output_cols(self) -> list[str]:
1476
1476
  """
1477
1477
  Get output column names meeting Snowflake requirements.
1478
1478
  Only useful when fitting a pandas dataframe.
@@ -1556,11 +1556,11 @@ class OneHotEncoder(base.BaseTransformer):
1556
1556
  sklearn_initial_keywords: Optional[Union[str, Iterable[str]]] = None,
1557
1557
  sklearn_unused_keywords: Optional[Union[str, Iterable[str]]] = None,
1558
1558
  snowml_only_keywords: Optional[Union[str, Iterable[str]]] = None,
1559
- sklearn_added_keyword_to_version_dict: Optional[Dict[str, str]] = None,
1560
- sklearn_added_kwarg_value_to_version_dict: Optional[Dict[str, Dict[str, str]]] = None,
1561
- sklearn_deprecated_keyword_to_version_dict: Optional[Dict[str, str]] = None,
1562
- sklearn_removed_keyword_to_version_dict: Optional[Dict[str, str]] = None,
1563
- ) -> Dict[str, Any]:
1559
+ sklearn_added_keyword_to_version_dict: Optional[dict[str, str]] = None,
1560
+ sklearn_added_kwarg_value_to_version_dict: Optional[dict[str, dict[str, str]]] = None,
1561
+ sklearn_deprecated_keyword_to_version_dict: Optional[dict[str, str]] = None,
1562
+ sklearn_removed_keyword_to_version_dict: Optional[dict[str, str]] = None,
1563
+ ) -> dict[str, Any]:
1564
1564
  """Modified snowflake.ml.framework.base.Base.get_sklearn_args with `sparse` and `sparse_output` handling."""
1565
1565
  default_sklearn_args = _utils.get_default_args(default_sklearn_obj.__class__.__init__)
1566
1566
  given_args = self.get_params()
@@ -1580,7 +1580,7 @@ class OneHotEncoder(base.BaseTransformer):
1580
1580
  if version.parse(sklearn_version) >= version.parse(_SKLEARN_DEPRECATED_KEYWORD_TO_VERSION_DICT["sparse"]):
1581
1581
  given_args["sparse_output"] = given_args.pop("sparse")
1582
1582
 
1583
- sklearn_args: Dict[str, Any] = _utils.get_filtered_valid_sklearn_args(
1583
+ sklearn_args: dict[str, Any] = _utils.get_filtered_valid_sklearn_args(
1584
1584
  args=given_args,
1585
1585
  default_sklearn_args=default_sklearn_args,
1586
1586
  sklearn_initial_keywords=sklearn_initial_keywords,