snowflake-ml-python 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. snowflake/cortex/__init__.py +4 -1
  2. snowflake/cortex/_classify_text.py +36 -0
  3. snowflake/cortex/_complete.py +281 -21
  4. snowflake/cortex/_extract_answer.py +0 -1
  5. snowflake/cortex/_sentiment.py +0 -1
  6. snowflake/cortex/_summarize.py +0 -1
  7. snowflake/cortex/_translate.py +0 -1
  8. snowflake/cortex/_util.py +12 -85
  9. snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
  10. snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
  11. snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
  12. snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
  13. snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
  14. snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
  15. snowflake/ml/_internal/lineage/lineage_utils.py +4 -4
  16. snowflake/ml/_internal/telemetry.py +38 -2
  17. snowflake/ml/_internal/utils/identifier.py +14 -0
  18. snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
  19. snowflake/ml/data/_internal/arrow_ingestor.py +228 -0
  20. snowflake/ml/data/_internal/ingestor_utils.py +58 -0
  21. snowflake/ml/data/data_connector.py +133 -0
  22. snowflake/ml/data/data_ingestor.py +28 -0
  23. snowflake/ml/data/data_source.py +23 -0
  24. snowflake/ml/dataset/dataset.py +39 -32
  25. snowflake/ml/dataset/dataset_reader.py +18 -118
  26. snowflake/ml/feature_store/access_manager.py +7 -1
  27. snowflake/ml/feature_store/entity.py +19 -2
  28. snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +20 -0
  29. snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +31 -0
  30. snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +24 -0
  31. snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +4 -0
  32. snowflake/ml/feature_store/examples/example_helper.py +240 -0
  33. snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +12 -0
  34. snowflake/ml/feature_store/examples/new_york_taxi_features/features/dropoff_features.py +39 -0
  35. snowflake/ml/feature_store/examples/new_york_taxi_features/features/pickup_features.py +58 -0
  36. snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +5 -0
  37. snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +36 -0
  38. snowflake/ml/feature_store/examples/source_data/fraud_transactions.yaml +29 -0
  39. snowflake/ml/feature_store/examples/source_data/nyc_yellow_trips.yaml +4 -0
  40. snowflake/ml/feature_store/examples/source_data/winequality_red.yaml +32 -0
  41. snowflake/ml/feature_store/examples/wine_quality_features/entities.py +14 -0
  42. snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +29 -0
  43. snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +21 -0
  44. snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +5 -0
  45. snowflake/ml/feature_store/feature_store.py +987 -264
  46. snowflake/ml/feature_store/feature_view.py +228 -13
  47. snowflake/ml/fileset/embedded_stage_fs.py +25 -21
  48. snowflake/ml/fileset/fileset.py +2 -2
  49. snowflake/ml/fileset/snowfs.py +4 -15
  50. snowflake/ml/fileset/stage_fs.py +24 -18
  51. snowflake/ml/lineage/__init__.py +3 -0
  52. snowflake/ml/lineage/lineage_node.py +139 -0
  53. snowflake/ml/model/_client/model/model_impl.py +47 -14
  54. snowflake/ml/model/_client/model/model_version_impl.py +82 -2
  55. snowflake/ml/model/_client/ops/model_ops.py +77 -5
  56. snowflake/ml/model/_client/sql/model.py +1 -0
  57. snowflake/ml/model/_client/sql/model_version.py +45 -2
  58. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +4 -6
  59. snowflake/ml/model/_model_composer/model_composer.py +15 -17
  60. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -17
  61. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -1
  62. snowflake/ml/model/_model_composer/model_method/function_generator.py +20 -4
  63. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +3 -32
  64. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +55 -0
  65. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +5 -34
  66. snowflake/ml/model/_model_composer/model_method/model_method.py +10 -7
  67. snowflake/ml/model/_packager/model_handlers/_base.py +13 -3
  68. snowflake/ml/model/_packager/model_handlers/_utils.py +59 -1
  69. snowflake/ml/model/_packager/model_handlers/catboost.py +44 -2
  70. snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
  71. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
  72. snowflake/ml/model/_packager/model_handlers/lightgbm.py +70 -2
  73. snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
  74. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
  75. snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
  76. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
  77. snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
  78. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
  79. snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
  80. snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
  81. snowflake/ml/model/_packager/model_handlers/xgboost.py +61 -2
  82. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  83. snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
  84. snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
  85. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  86. snowflake/ml/model/_packager/model_packager.py +9 -4
  87. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  88. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -5
  89. snowflake/ml/model/custom_model.py +22 -2
  90. snowflake/ml/model/model_signature.py +4 -4
  91. snowflake/ml/model/type_hints.py +77 -4
  92. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +3 -1
  93. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +13 -1
  94. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +1 -0
  95. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +6 -0
  96. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +1 -0
  97. snowflake/ml/modeling/cluster/affinity_propagation.py +4 -2
  98. snowflake/ml/modeling/cluster/agglomerative_clustering.py +4 -2
  99. snowflake/ml/modeling/cluster/birch.py +4 -2
  100. snowflake/ml/modeling/cluster/bisecting_k_means.py +4 -2
  101. snowflake/ml/modeling/cluster/dbscan.py +4 -2
  102. snowflake/ml/modeling/cluster/feature_agglomeration.py +4 -2
  103. snowflake/ml/modeling/cluster/k_means.py +4 -2
  104. snowflake/ml/modeling/cluster/mean_shift.py +4 -2
  105. snowflake/ml/modeling/cluster/mini_batch_k_means.py +4 -2
  106. snowflake/ml/modeling/cluster/optics.py +4 -2
  107. snowflake/ml/modeling/cluster/spectral_biclustering.py +4 -2
  108. snowflake/ml/modeling/cluster/spectral_clustering.py +4 -2
  109. snowflake/ml/modeling/cluster/spectral_coclustering.py +4 -2
  110. snowflake/ml/modeling/compose/column_transformer.py +4 -2
  111. snowflake/ml/modeling/covariance/elliptic_envelope.py +4 -2
  112. snowflake/ml/modeling/covariance/empirical_covariance.py +4 -2
  113. snowflake/ml/modeling/covariance/graphical_lasso.py +4 -2
  114. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +4 -2
  115. snowflake/ml/modeling/covariance/ledoit_wolf.py +4 -2
  116. snowflake/ml/modeling/covariance/min_cov_det.py +4 -2
  117. snowflake/ml/modeling/covariance/oas.py +4 -2
  118. snowflake/ml/modeling/covariance/shrunk_covariance.py +4 -2
  119. snowflake/ml/modeling/decomposition/dictionary_learning.py +4 -2
  120. snowflake/ml/modeling/decomposition/factor_analysis.py +4 -2
  121. snowflake/ml/modeling/decomposition/fast_ica.py +4 -2
  122. snowflake/ml/modeling/decomposition/incremental_pca.py +4 -2
  123. snowflake/ml/modeling/decomposition/kernel_pca.py +4 -2
  124. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +4 -2
  125. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +4 -2
  126. snowflake/ml/modeling/decomposition/pca.py +4 -2
  127. snowflake/ml/modeling/decomposition/sparse_pca.py +4 -2
  128. snowflake/ml/modeling/decomposition/truncated_svd.py +4 -2
  129. snowflake/ml/modeling/ensemble/isolation_forest.py +4 -2
  130. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +4 -2
  131. snowflake/ml/modeling/feature_selection/variance_threshold.py +4 -2
  132. snowflake/ml/modeling/impute/iterative_imputer.py +4 -2
  133. snowflake/ml/modeling/impute/knn_imputer.py +4 -2
  134. snowflake/ml/modeling/impute/missing_indicator.py +4 -2
  135. snowflake/ml/modeling/impute/simple_imputer.py +26 -0
  136. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +4 -2
  137. snowflake/ml/modeling/kernel_approximation/nystroem.py +4 -2
  138. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +4 -2
  139. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +4 -2
  140. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +4 -2
  141. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +4 -2
  142. snowflake/ml/modeling/manifold/isomap.py +4 -2
  143. snowflake/ml/modeling/manifold/mds.py +4 -2
  144. snowflake/ml/modeling/manifold/spectral_embedding.py +4 -2
  145. snowflake/ml/modeling/manifold/tsne.py +4 -2
  146. snowflake/ml/modeling/metrics/ranking.py +3 -0
  147. snowflake/ml/modeling/metrics/regression.py +3 -0
  148. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +4 -2
  149. snowflake/ml/modeling/mixture/gaussian_mixture.py +4 -2
  150. snowflake/ml/modeling/neighbors/kernel_density.py +4 -2
  151. snowflake/ml/modeling/neighbors/local_outlier_factor.py +4 -2
  152. snowflake/ml/modeling/neighbors/nearest_neighbors.py +4 -2
  153. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +4 -2
  154. snowflake/ml/modeling/pipeline/pipeline.py +5 -4
  155. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +43 -9
  156. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +36 -8
  157. snowflake/ml/modeling/preprocessing/polynomial_features.py +4 -2
  158. snowflake/ml/registry/_manager/model_manager.py +16 -3
  159. snowflake/ml/registry/registry.py +100 -13
  160. snowflake/ml/version.py +1 -1
  161. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/METADATA +81 -7
  162. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/RECORD +165 -139
  163. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/WHEEL +1 -1
  164. snowflake/ml/_internal/lineage/data_source.py +0 -10
  165. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/LICENSE.txt +0 -0
  166. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/top_level.txt +0 -0
@@ -76,8 +76,10 @@ class ShrunkCovariance(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class DictionaryLearning(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class FactorAnalysis(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class FastICA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class IncrementalPCA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class KernelPCA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class MiniBatchDictionaryLearning(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class MiniBatchSparsePCA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class PCA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class SparsePCA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class TruncatedSVD(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class IsolationForest(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class SequentialFeatureSelector(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class VarianceThreshold(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -77,8 +77,10 @@ class IterativeImputer(BaseTransformer):
77
77
  initialization with the `set_input_cols` method.
78
78
 
79
79
  label_cols: Optional[Union[str, List[str]]]
80
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
81
-
80
+ A string or list of strings representing column names that contain labels.
81
+ Label columns must be specified with this parameter during initialization
82
+ or with the `set_label_cols` method before fitting.
83
+
82
84
  output_cols: Optional[Union[str, List[str]]]
83
85
  A string or list of strings representing column names that will store the
84
86
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class KNNImputer(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class MissingIndicator(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env python3
2
2
  import copy
3
+ import warnings
3
4
  from typing import Any, Dict, Iterable, Optional, Type, Union
4
5
 
5
6
  import numpy as np
@@ -10,6 +11,7 @@ from sklearn import impute
10
11
  from snowflake import snowpark
11
12
  from snowflake.ml._internal import telemetry
12
13
  from snowflake.ml._internal.exceptions import error_codes, exceptions
14
+ from snowflake.ml._internal.utils import formatting
13
15
  from snowflake.ml.modeling.framework import _utils, base
14
16
  from snowflake.snowpark import functions as F, types as T
15
17
  from snowflake.snowpark._internal import utils as snowpark_utils
@@ -171,6 +173,14 @@ class SimpleImputer(base.BaseTransformer):
171
173
  self.set_output_cols(output_cols)
172
174
  self.set_passthrough_cols(passthrough_cols)
173
175
 
176
+ def _is_integer_type(self, column_type: T.DataType) -> bool:
177
+ return (
178
+ isinstance(column_type, T.ByteType)
179
+ or isinstance(column_type, T.ShortType)
180
+ or isinstance(column_type, T.IntegerType)
181
+ or isinstance(column_type, T.LongType)
182
+ )
183
+
174
184
  def _reset(self) -> None:
175
185
  """
176
186
  Reset internal data-dependent state of the imputer, if necessary.
@@ -389,6 +399,22 @@ class SimpleImputer(base.BaseTransformer):
389
399
  # Use `fillna` for replacing nans. Check if the column has a string data type, or coerce a float.
390
400
  if not isinstance(input_col_datatypes[input_col], T.StringType):
391
401
  statistic = float(statistic)
402
+
403
+ if self._is_integer_type(input_col_datatypes[input_col]):
404
+ if statistic.is_integer():
405
+ statistic = int(statistic)
406
+ else:
407
+ warnings.warn(
408
+ formatting.unwrap(
409
+ f"""
410
+ Integer column may not be imputed with a non-integer value {statistic}.
411
+ In order to impute a non-integer value, convert the column to FloatType before imputing.
412
+ """
413
+ ),
414
+ category=UserWarning,
415
+ stacklevel=1,
416
+ )
417
+
392
418
  transformed_dataset = transformed_dataset.na.fill({output_col: statistic})
393
419
  else:
394
420
  transformed_dataset = transformed_dataset.na.replace(
@@ -76,8 +76,10 @@ class AdditiveChi2Sampler(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class Nystroem(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class PolynomialCountSketch(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class RBFSampler(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class SkewedChi2Sampler(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class SGDOneClassSVM(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class Isomap(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class MDS(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class SpectralEmbedding(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class TSNE(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -102,6 +102,7 @@ def precision_recall_curve(
102
102
  ],
103
103
  statement_params=statement_params,
104
104
  anonymous=True,
105
+ execute_as="caller",
105
106
  )
106
107
  def precision_recall_curve_anon_sproc(session: snowpark.Session) -> bytes:
107
108
  for query in queries[:-1]:
@@ -249,6 +250,7 @@ def roc_auc_score(
249
250
  ],
250
251
  statement_params=statement_params,
251
252
  anonymous=True,
253
+ execute_as="caller",
252
254
  )
253
255
  def roc_auc_score_anon_sproc(session: snowpark.Session) -> bytes:
254
256
  for query in queries[:-1]:
@@ -352,6 +354,7 @@ def roc_curve(
352
354
  ],
353
355
  statement_params=statement_params,
354
356
  anonymous=True,
357
+ execute_as="caller",
355
358
  )
356
359
  def roc_curve_anon_sproc(session: snowpark.Session) -> bytes:
357
360
  for query in queries[:-1]:
@@ -87,6 +87,7 @@ def d2_absolute_error_score(
87
87
  ],
88
88
  statement_params=statement_params,
89
89
  anonymous=True,
90
+ execute_as="caller",
90
91
  )
91
92
  def d2_absolute_error_score_anon_sproc(session: snowpark.Session) -> bytes:
92
93
  for query in queries[:-1]:
@@ -184,6 +185,7 @@ def d2_pinball_score(
184
185
  ],
185
186
  statement_params=statement_params,
186
187
  anonymous=True,
188
+ execute_as="caller",
187
189
  )
188
190
  def d2_pinball_score_anon_sproc(session: snowpark.Session) -> bytes:
189
191
  for query in queries[:-1]:
@@ -299,6 +301,7 @@ def explained_variance_score(
299
301
  ],
300
302
  statement_params=statement_params,
301
303
  anonymous=True,
304
+ execute_as="caller",
302
305
  )
303
306
  def explained_variance_score_anon_sproc(session: snowpark.Session) -> bytes:
304
307
  for query in queries[:-1]:
@@ -76,8 +76,10 @@ class BayesianGaussianMixture(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class GaussianMixture(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class KernelDensity(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class LocalOutlierFactor(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class NearestNeighbors(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class BernoulliRBM(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -99,10 +99,6 @@ class Pipeline(base.BaseTransformer):
99
99
  must implement `fit` and `transform` methods.
100
100
  The final step can be a transform or estimator, that is, it must implement
101
101
  `fit` and `transform`/`predict` methods.
102
- TODO: SKLearn pipeline expects last step(and only the last step) to be an estimator obj or a dummy
103
- estimator(like None or passthrough). Currently this Pipeline class works with a list of all
104
- transforms or a list of transforms ending with an estimator. Should we change this implementation
105
- to only work with list of steps ending with an estimator or a dummy estimator like SKLearn?
106
102
 
107
103
  Args:
108
104
  steps: List of (name, transform) tuples (implementing `fit`/`transform`) that
@@ -111,6 +107,10 @@ class Pipeline(base.BaseTransformer):
111
107
  """
112
108
  super().__init__()
113
109
  self.steps = steps
110
+ # TODO(snandamuri): SKLearn pipeline expects last step(and only the last step) to be an estimator obj or a dummy
111
+ # estimator(like None or passthrough). Currently this Pipeline class works with a list of all
112
+ # transforms or a list of transforms ending with an estimator. Should we change this implementation
113
+ # to only work with list of steps ending with an estimator or a dummy estimator like SKLearn?
114
114
  self._is_final_step_estimator = Pipeline._is_estimator(steps[-1][1])
115
115
  self._is_fitted = False
116
116
  self._feature_names_in: List[np.ndarray[Any, np.dtype[Any]]] = []
@@ -378,6 +378,7 @@ class Pipeline(base.BaseTransformer):
378
378
  anonymous=True,
379
379
  imports=imports, # type: ignore[arg-type]
380
380
  statement_params=sproc_statement_params,
381
+ execute_as="caller",
381
382
  )
382
383
 
383
384
  sproc_export_file_name: str = pipeline_within_one_sproc(