PyPI - snowflake-ml-python - Versions diffs - 1.6.1__py3-none-any.whl → 1.6.3__py3-none-any.whl - Mend

snowflake-ml-python 1.6.1py3-none-any.whl → 1.6.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (284) hide show

snowflake/cortex/__init__.py +4 -0
snowflake/cortex/_classify_text.py +2 -2
snowflake/cortex/_embed_text_1024.py +37 -0
snowflake/cortex/_embed_text_768.py +37 -0
snowflake/cortex/_extract_answer.py +2 -2
snowflake/cortex/_sentiment.py +2 -2
snowflake/cortex/_summarize.py +2 -2
snowflake/cortex/_translate.py +2 -2
snowflake/cortex/_util.py +4 -4
snowflake/ml/_internal/env_utils.py +5 -5
snowflake/ml/_internal/exceptions/error_codes.py +2 -0
snowflake/ml/_internal/telemetry.py +142 -20
snowflake/ml/_internal/utils/db_utils.py +50 -0
snowflake/ml/_internal/utils/identifier.py +48 -11
snowflake/ml/_internal/utils/service_logger.py +63 -0
snowflake/ml/_internal/utils/snowflake_env.py +23 -13
snowflake/ml/_internal/utils/sql_identifier.py +26 -2
snowflake/ml/_internal/utils/table_manager.py +19 -1
snowflake/ml/data/_internal/arrow_ingestor.py +1 -11
snowflake/ml/data/data_connector.py +33 -7
snowflake/ml/data/ingestor_utils.py +20 -10
snowflake/ml/data/torch_utils.py +68 -0
snowflake/ml/dataset/dataset.py +1 -3
snowflake/ml/feature_store/access_manager.py +3 -3
snowflake/ml/feature_store/feature_store.py +60 -19
snowflake/ml/feature_store/feature_view.py +84 -30
snowflake/ml/fileset/embedded_stage_fs.py +1 -1
snowflake/ml/fileset/fileset.py +1 -1
snowflake/ml/fileset/sfcfs.py +9 -3
snowflake/ml/fileset/stage_fs.py +2 -1
snowflake/ml/lineage/lineage_node.py +7 -2
snowflake/ml/model/__init__.py +1 -2
snowflake/ml/model/_client/model/model_version_impl.py +96 -12
snowflake/ml/model/_client/ops/model_ops.py +124 -6
snowflake/ml/model/_client/ops/service_ops.py +309 -9
snowflake/ml/model/_client/service/model_deployment_spec.py +8 -5
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +2 -2
snowflake/ml/model/_client/sql/_base.py +5 -0
snowflake/ml/model/_client/sql/model.py +1 -0
snowflake/ml/model/_client/sql/model_version.py +9 -5
snowflake/ml/model/_client/sql/service.py +121 -20
snowflake/ml/model/_model_composer/model_composer.py +11 -39
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -11
snowflake/ml/model/_packager/model_env/model_env.py +4 -38
snowflake/ml/model/_packager/model_handlers/_utils.py +134 -28
snowflake/ml/model/_packager/model_handlers/catboost.py +31 -30
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +26 -18
snowflake/ml/model/_packager/model_handlers/lightgbm.py +31 -58
snowflake/ml/model/_packager/model_handlers/mlflow.py +3 -5
snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +169 -0
snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +15 -8
snowflake/ml/model/_packager/model_handlers/sklearn.py +56 -60
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +141 -9
snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
snowflake/ml/model/_packager/model_handlers/xgboost.py +63 -48
snowflake/ml/model/_packager/model_meta/model_meta.py +16 -42
snowflake/ml/model/_packager/model_meta/model_meta_schema.py +1 -14
snowflake/ml/model/_packager/model_packager.py +14 -8
snowflake/ml/model/_packager/model_runtime/model_runtime.py +11 -0
snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
snowflake/ml/model/_signatures/snowpark_handler.py +3 -2
snowflake/ml/model/_signatures/utils.py +9 -0
snowflake/ml/model/type_hints.py +12 -145
snowflake/ml/modeling/_internal/constants.py +1 -0
snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
snowflake/ml/modeling/_internal/model_specifications.py +2 -0
snowflake/ml/modeling/_internal/model_trainer.py +1 -0
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -4
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +130 -166
snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +0 -1
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +61 -21
snowflake/ml/modeling/cluster/affinity_propagation.py +61 -21
snowflake/ml/modeling/cluster/agglomerative_clustering.py +61 -21
snowflake/ml/modeling/cluster/birch.py +61 -21
snowflake/ml/modeling/cluster/bisecting_k_means.py +61 -21
snowflake/ml/modeling/cluster/dbscan.py +61 -21
snowflake/ml/modeling/cluster/feature_agglomeration.py +61 -21
snowflake/ml/modeling/cluster/k_means.py +61 -21
snowflake/ml/modeling/cluster/mean_shift.py +61 -21
snowflake/ml/modeling/cluster/mini_batch_k_means.py +61 -21
snowflake/ml/modeling/cluster/optics.py +61 -21
snowflake/ml/modeling/cluster/spectral_biclustering.py +61 -21
snowflake/ml/modeling/cluster/spectral_clustering.py +61 -21
snowflake/ml/modeling/cluster/spectral_coclustering.py +61 -21
snowflake/ml/modeling/compose/column_transformer.py +61 -21
snowflake/ml/modeling/compose/transformed_target_regressor.py +61 -21
snowflake/ml/modeling/covariance/elliptic_envelope.py +61 -21
snowflake/ml/modeling/covariance/empirical_covariance.py +61 -21
snowflake/ml/modeling/covariance/graphical_lasso.py +61 -21
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +61 -21
snowflake/ml/modeling/covariance/ledoit_wolf.py +61 -21
snowflake/ml/modeling/covariance/min_cov_det.py +61 -21
snowflake/ml/modeling/covariance/oas.py +61 -21
snowflake/ml/modeling/covariance/shrunk_covariance.py +61 -21
snowflake/ml/modeling/decomposition/dictionary_learning.py +61 -21
snowflake/ml/modeling/decomposition/factor_analysis.py +61 -21
snowflake/ml/modeling/decomposition/fast_ica.py +61 -21
snowflake/ml/modeling/decomposition/incremental_pca.py +61 -21
snowflake/ml/modeling/decomposition/kernel_pca.py +61 -21
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +61 -21
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +61 -21
snowflake/ml/modeling/decomposition/pca.py +61 -21
snowflake/ml/modeling/decomposition/sparse_pca.py +61 -21
snowflake/ml/modeling/decomposition/truncated_svd.py +61 -21
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +61 -21
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +61 -21
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +61 -21
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +61 -21
snowflake/ml/modeling/ensemble/bagging_classifier.py +61 -21
snowflake/ml/modeling/ensemble/bagging_regressor.py +61 -21
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +61 -21
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +61 -21
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +61 -21
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +61 -21
snowflake/ml/modeling/ensemble/isolation_forest.py +61 -21
snowflake/ml/modeling/ensemble/random_forest_classifier.py +61 -21
snowflake/ml/modeling/ensemble/random_forest_regressor.py +61 -21
snowflake/ml/modeling/ensemble/stacking_regressor.py +61 -21
snowflake/ml/modeling/ensemble/voting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/voting_regressor.py +61 -21
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +61 -21
snowflake/ml/modeling/feature_selection/select_fdr.py +61 -21
snowflake/ml/modeling/feature_selection/select_fpr.py +61 -21
snowflake/ml/modeling/feature_selection/select_fwe.py +61 -21
snowflake/ml/modeling/feature_selection/select_k_best.py +61 -21
snowflake/ml/modeling/feature_selection/select_percentile.py +61 -21
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +61 -21
snowflake/ml/modeling/feature_selection/variance_threshold.py +61 -21
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +61 -21
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +61 -21
snowflake/ml/modeling/impute/iterative_imputer.py +61 -21
snowflake/ml/modeling/impute/knn_imputer.py +61 -21
snowflake/ml/modeling/impute/missing_indicator.py +61 -21
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +61 -21
snowflake/ml/modeling/kernel_approximation/nystroem.py +61 -21
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +61 -21
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +61 -21
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +61 -21
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +61 -21
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +61 -21
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ard_regression.py +61 -21
snowflake/ml/modeling/linear_model/bayesian_ridge.py +61 -21
snowflake/ml/modeling/linear_model/elastic_net.py +61 -21
snowflake/ml/modeling/linear_model/elastic_net_cv.py +61 -21
snowflake/ml/modeling/linear_model/gamma_regressor.py +61 -21
snowflake/ml/modeling/linear_model/huber_regressor.py +61 -21
snowflake/ml/modeling/linear_model/lars.py +61 -21
snowflake/ml/modeling/linear_model/lars_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso.py +61 -21
snowflake/ml/modeling/linear_model/lasso_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +61 -21
snowflake/ml/modeling/linear_model/linear_regression.py +61 -21
snowflake/ml/modeling/linear_model/logistic_regression.py +61 -21
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_lasso.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +61 -21
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +61 -21
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +61 -21
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +61 -21
snowflake/ml/modeling/linear_model/perceptron.py +61 -21
snowflake/ml/modeling/linear_model/poisson_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ransac_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ridge.py +61 -21
snowflake/ml/modeling/linear_model/ridge_classifier.py +61 -21
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +61 -21
snowflake/ml/modeling/linear_model/ridge_cv.py +61 -21
snowflake/ml/modeling/linear_model/sgd_classifier.py +61 -21
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +61 -21
snowflake/ml/modeling/linear_model/sgd_regressor.py +61 -21
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +61 -21
snowflake/ml/modeling/linear_model/tweedie_regressor.py +61 -21
snowflake/ml/modeling/manifold/isomap.py +61 -21
snowflake/ml/modeling/manifold/mds.py +61 -21
snowflake/ml/modeling/manifold/spectral_embedding.py +61 -21
snowflake/ml/modeling/manifold/tsne.py +61 -21
snowflake/ml/modeling/metrics/metrics_utils.py +2 -2
snowflake/ml/modeling/metrics/ranking.py +0 -3
snowflake/ml/modeling/metrics/regression.py +0 -3
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +61 -21
snowflake/ml/modeling/mixture/gaussian_mixture.py +61 -21
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +61 -21
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +61 -21
snowflake/ml/modeling/multiclass/output_code_classifier.py +61 -21
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/categorical_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/complement_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +61 -21
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +61 -21
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +61 -21
snowflake/ml/modeling/neighbors/kernel_density.py +61 -21
snowflake/ml/modeling/neighbors/local_outlier_factor.py +61 -21
snowflake/ml/modeling/neighbors/nearest_centroid.py +61 -21
snowflake/ml/modeling/neighbors/nearest_neighbors.py +61 -21
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +61 -21
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +61 -21
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +61 -21
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +61 -21
snowflake/ml/modeling/neural_network/mlp_classifier.py +61 -21
snowflake/ml/modeling/neural_network/mlp_regressor.py +61 -21
snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
snowflake/ml/modeling/pipeline/pipeline.py +1 -13
snowflake/ml/modeling/preprocessing/polynomial_features.py +61 -21
snowflake/ml/modeling/semi_supervised/label_propagation.py +61 -21
snowflake/ml/modeling/semi_supervised/label_spreading.py +61 -21
snowflake/ml/modeling/svm/linear_svc.py +61 -21
snowflake/ml/modeling/svm/linear_svr.py +61 -21
snowflake/ml/modeling/svm/nu_svc.py +61 -21
snowflake/ml/modeling/svm/nu_svr.py +61 -21
snowflake/ml/modeling/svm/svc.py +61 -21
snowflake/ml/modeling/svm/svr.py +61 -21
snowflake/ml/modeling/tree/decision_tree_classifier.py +61 -21
snowflake/ml/modeling/tree/decision_tree_regressor.py +61 -21
snowflake/ml/modeling/tree/extra_tree_classifier.py +61 -21
snowflake/ml/modeling/tree/extra_tree_regressor.py +61 -21
snowflake/ml/modeling/xgboost/xgb_classifier.py +64 -23
snowflake/ml/modeling/xgboost/xgb_regressor.py +64 -23
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +64 -23
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +64 -23
snowflake/ml/monitoring/_client/model_monitor.py +126 -0
snowflake/ml/monitoring/_client/model_monitor_manager.py +361 -0
snowflake/ml/monitoring/_client/model_monitor_version.py +1 -0
snowflake/ml/monitoring/_client/monitor_sql_client.py +1335 -0
snowflake/ml/monitoring/_client/queries/record_count.ssql +14 -0
snowflake/ml/monitoring/_client/queries/rmse.ssql +28 -0
snowflake/ml/monitoring/entities/model_monitor_config.py +28 -0
snowflake/ml/monitoring/entities/model_monitor_interval.py +46 -0
snowflake/ml/monitoring/entities/output_score_type.py +90 -0
snowflake/ml/registry/_manager/model_manager.py +4 -0
snowflake/ml/registry/registry.py +166 -8
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/METADATA +43 -9
snowflake_ml_python-1.6.3.dist-info/RECORD +400 -0
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/WHEEL +1 -1
snowflake/ml/_internal/container_services/image_registry/credential.py +0 -84
snowflake/ml/_internal/container_services/image_registry/http_client.py +0 -127
snowflake/ml/_internal/container_services/image_registry/imagelib.py +0 -400
snowflake/ml/_internal/container_services/image_registry/registry_client.py +0 -212
snowflake/ml/_internal/utils/log_stream_processor.py +0 -30
snowflake/ml/_internal/utils/session_token_manager.py +0 -46
snowflake/ml/_internal/utils/spcs_attribution_utils.py +0 -122
snowflake/ml/_internal/utils/uri.py +0 -77
snowflake/ml/data/torch_dataset.py +0 -33
snowflake/ml/model/_api.py +0 -568
snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +0 -12
snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +0 -249
snowflake/ml/model/_deploy_client/image_builds/docker_context.py +0 -130
snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +0 -36
snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +0 -268
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +0 -215
snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +0 -53
snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +0 -38
snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +0 -105
snowflake/ml/model/_deploy_client/snowservice/deploy.py +0 -611
snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +0 -116
snowflake/ml/model/_deploy_client/snowservice/instance_types.py +0 -10
snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +0 -28
snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template_with_model +0 -21
snowflake/ml/model/_deploy_client/utils/constants.py +0 -48
snowflake/ml/model/_deploy_client/utils/snowservice_client.py +0 -280
snowflake/ml/model/_deploy_client/warehouse/deploy.py +0 -202
snowflake/ml/model/_deploy_client/warehouse/infer_template.py +0 -99
snowflake/ml/model/_packager/model_handlers/llm.py +0 -267
snowflake/ml/model/_packager/model_meta/_core_requirements.py +0 -11
snowflake/ml/model/deploy_platforms.py +0 -6
snowflake/ml/model/models/llm.py +0 -104
snowflake/ml/monitoring/monitor.py +0 -203
snowflake/ml/registry/_initial_schema.py +0 -142
snowflake/ml/registry/_schema.py +0 -82
snowflake/ml/registry/_schema_upgrade_plans.py +0 -116
snowflake/ml/registry/_schema_version_manager.py +0 -163
snowflake/ml/registry/model_registry.py +0 -2048
snowflake_ml_python-1.6.1.dist-info/RECORD +0 -422
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/LICENSE.txt +0 -0
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/top_level.txt +0 -0

snowflake/ml/modeling/_internal/model_trainer.py CHANGED Viewed

@@ -20,6 +20,7 @@ class ModelTrainer(Protocol):
         self,
         expected_output_cols_list: List[str],
         drop_input_cols: Optional[bool] = False,
+        example_output_pd_df: Optional[pd.DataFrame] = None,
     ) -> Tuple[Union[DataFrame, pd.DataFrame], object]:
         raise NotImplementedError

snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py CHANGED Viewed

@@ -377,7 +377,6 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
             anonymous=True,
             imports=imports,  # type: ignore[arg-type]
             statement_params=sproc_statement_params,
-            execute_as="caller",
         )
         def _distributed_search(
             session: Session,
@@ -495,7 +494,7 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
                     label_arg_name = "Y" if "Y" in argspec.args else "y"
                     args[label_arg_name] = df[label_cols].squeeze()
-                if sample_weight_col is not None and "sample_weight" in argspec.args:
+                if sample_weight_col is not None:
                     args["sample_weight"] = df[sample_weight_col].squeeze()
                 return args, estimator, indices, len(df), params_to_evaluate
@@ -783,7 +782,6 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
             anonymous=True,
             imports=imports,  # type: ignore[arg-type]
             statement_params=sproc_statement_params,
-            execute_as="caller",
         )
         def _distributed_search(
             session: Session,
@@ -1061,7 +1059,7 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
                         if label_cols:
                             label_arg_name = "Y" if "Y" in argspec.args else "y"
                             args[label_arg_name] = y
-                        if sample_weight_col is not None and "sample_weight" in argspec.args:
+                        if sample_weight_col is not None:
                             args["sample_weight"] = df[sample_weight_col].squeeze()
                         # estimator.refit = original_refit
                         refit_start_time = time.time()

snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py CHANGED Viewed

@@ -318,19 +318,19 @@ class SnowparkTransformHandlers:
             with open(local_score_file_name_path, mode="r+b") as local_score_file_obj:
                 estimator = cp.load(local_score_file_obj)
-            argspec = inspect.getfullargspec(estimator.score)
-            if "X" in argspec.args:
+            params = inspect.signature(estimator.score).parameters
+            if "X" in params:
                 args = {"X": df[input_cols]}
-            elif "X_test" in argspec.args:
+            elif "X_test" in params:
                 args = {"X_test": df[input_cols]}
             else:
                 raise RuntimeError("Neither 'X' or 'X_test' exist in argument")
             if label_cols:
-                label_arg_name = "Y" if "Y" in argspec.args else "y"
+                label_arg_name = "Y" if "Y" in params else "y"
                 args[label_arg_name] = df[label_cols].squeeze()
-            if sample_weight_col is not None and "sample_weight" in argspec.args:
+            if sample_weight_col is not None and "sample_weight" in params:
                 args["sample_weight"] = df[sample_weight_col].squeeze()
             result: float = estimator.score(**args)

snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py CHANGED Viewed

@@ -35,6 +35,7 @@ cp.register_pickle_by_value(inspect.getmodule(handle_inference_result))
 _PROJECT = "ModelDevelopment"
 _ENABLE_ANONYMOUS_SPROC = False
+_ENABLE_TRACER = True
 class SnowparkModelTrainer:
@@ -119,6 +120,8 @@ class SnowparkModelTrainer:
             A callable that can be registered as a stored procedure.
         """
         imports = model_spec.imports  # In order for the sproc to not resolve this reference in snowflake.ml
+        method_name = "fit"
+        tracer_name = f"snowpark.ml.modeling.{self._class_name.lower()}.{method_name}"
         def fit_wrapper_function(
             session: Session,
@@ -138,110 +141,97 @@ class SnowparkModelTrainer:
             for import_name in imports:
                 importlib.import_module(import_name)
-            # Execute snowpark queries and obtain the results as pandas dataframe
-            # NB: this implies that the result data must fit into memory.
-            for query in sql_queries[:-1]:
-                _ = session.sql(query).collect(statement_params=statement_params)
-            sp_df = session.sql(sql_queries[-1])
-            df: pd.DataFrame = sp_df.to_pandas(statement_params=statement_params)
-            df.columns = sp_df.columns
+            def fit_and_return_estimator() -> str:
+                """This is a helper function within the sproc to download the data, fit the model, and upload the model.
+                Returns:
+                    The name of the file in session's temp stage (temp_stage_name) that contains the serialized model.
+                """
+                # Execute snowpark queries and obtain the results as pandas dataframe
+                # NB: this implies that the result data must fit into memory.
+                for query in sql_queries[:-1]:
+                    _ = session.sql(query).collect(statement_params=statement_params)
+                sp_df = session.sql(sql_queries[-1])
+                df: pd.DataFrame = sp_df.to_pandas(statement_params=statement_params)
+                df.columns = sp_df.columns
+                local_transform_file_name = temp_file_utils.get_temp_file_path()
+                session.file.get(
+                    stage_location=temp_stage_name,
+                    target_directory=local_transform_file_name,
+                    statement_params=statement_params,
+                )
-            local_transform_file_name = temp_file_utils.get_temp_file_path()
+                local_transform_file_path = os.path.join(
+                    local_transform_file_name, os.listdir(local_transform_file_name)[0]
+                )
+                with open(local_transform_file_path, mode="r+b") as local_transform_file_obj:
+                    estimator = cp.load(local_transform_file_obj)
-            session.file.get(
-                stage_location=temp_stage_name,
-                target_directory=local_transform_file_name,
-                statement_params=statement_params,
-            )
+                params = inspect.signature(estimator.fit).parameters
+                args = {"X": df[input_cols]}
+                if label_cols:
+                    label_arg_name = "Y" if "Y" in params else "y"
+                    args[label_arg_name] = df[label_cols].squeeze()
-            local_transform_file_path = os.path.join(
-                local_transform_file_name, os.listdir(local_transform_file_name)[0]
-            )
-            with open(local_transform_file_path, mode="r+b") as local_transform_file_obj:
-                estimator = cp.load(local_transform_file_obj)
+                if sample_weight_col is not None and "sample_weight" in params:
+                    args["sample_weight"] = df[sample_weight_col].squeeze()
-            argspec = inspect.getfullargspec(estimator.fit)
-            args = {"X": df[input_cols]}
-            if label_cols:
-                label_arg_name = "Y" if "Y" in argspec.args else "y"
-                args[label_arg_name] = df[label_cols].squeeze()
+                estimator.fit(**args)
-            if sample_weight_col is not None and "sample_weight" in argspec.args:
-                args["sample_weight"] = df[sample_weight_col].squeeze()
+                local_result_file_name = temp_file_utils.get_temp_file_path()
-            estimator.fit(**args)
+                with open(local_result_file_name, mode="w+b") as local_result_file_obj:
+                    cp.dump(estimator, local_result_file_obj)
-            local_result_file_name = temp_file_utils.get_temp_file_path()
+                session.file.put(
+                    local_file_name=local_result_file_name,
+                    stage_location=temp_stage_name,
+                    auto_compress=False,
+                    overwrite=True,
+                    statement_params=statement_params,
+                )
+                return local_result_file_name
-            with open(local_result_file_name, mode="w+b") as local_result_file_obj:
-                cp.dump(estimator, local_result_file_obj)
+            if _ENABLE_TRACER:
-            session.file.put(
-                local_file_name=local_result_file_name,
-                stage_location=temp_stage_name,
-                auto_compress=False,
-                overwrite=True,
-                statement_params=statement_params,
-            )
+                # Use opentelemetry to trace the dist and span of the fit operation.
+                # This would allow user to see the trace in the Snowflake UI.
+                from opentelemetry import trace
-            # Note: you can add something like  + "|" + str(df) to the return string
-            # to pass debug information to the caller.
-            return str(os.path.basename(local_result_file_name))
+                tracer = trace.get_tracer(tracer_name)
+                with tracer.start_as_current_span("fit"):
+                    local_result_file_name = fit_and_return_estimator()
+                    # Note: you can add something like  + "|" + str(df) to the return string
+                    # to pass debug information to the caller.
+                    return str(os.path.basename(local_result_file_name))
+            else:
+                local_result_file_name = fit_and_return_estimator()
+                return str(os.path.basename(local_result_file_name))
         return fit_wrapper_function
-    def _get_fit_wrapper_sproc_anonymous(self, statement_params: Dict[str, str]) -> StoredProcedure:
+    def _get_fit_wrapper_sproc(self, statement_params: Dict[str, str], anonymous: bool) -> StoredProcedure:
         model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
-        fit_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
-        relaxed_dependencies = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-            pkg_versions=model_spec.pkgDependencies, session=self.session
-        )
-        fit_wrapper_sproc = self.session.sproc.register(
-            func=self._build_fit_wrapper_sproc(model_spec=model_spec),
-            is_permanent=False,
-            name=fit_sproc_name,
-            packages=["snowflake-snowpark-python"] + relaxed_dependencies,  # type: ignore[arg-type]
-            replace=True,
-            session=self.session,
-            statement_params=statement_params,
-            anonymous=True,
-            execute_as="caller",
-        )
-        return fit_wrapper_sproc
-    def _get_fit_wrapper_sproc(self, statement_params: Dict[str, str]) -> StoredProcedure:
-        # If the sproc already exists, don't register.
-        if not hasattr(self.session, "_FIT_WRAPPER_SPROCS"):
-            self.session._FIT_WRAPPER_SPROCS: Dict[str, StoredProcedure] = {}  # type: ignore[attr-defined, misc]
-        model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
-        fit_sproc_key = model_spec.__class__.__name__
-        if fit_sproc_key in self.session._FIT_WRAPPER_SPROCS:  # type: ignore[attr-defined]
-            fit_sproc: StoredProcedure = self.session._FIT_WRAPPER_SPROCS[fit_sproc_key]  # type: ignore[attr-defined]
-            return fit_sproc
         fit_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
         relaxed_dependencies = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
             pkg_versions=model_spec.pkgDependencies, session=self.session
         )
+        packages = ["snowflake-snowpark-python", "snowflake-telemetry-python"] + relaxed_dependencies
         fit_wrapper_sproc = self.session.sproc.register(
             func=self._build_fit_wrapper_sproc(model_spec=model_spec),
             is_permanent=False,
             name=fit_sproc_name,
-            packages=["snowflake-snowpark-python"] + relaxed_dependencies,  # type: ignore[arg-type]
+            packages=packages,  # type: ignore[arg-type]
             replace=True,
             session=self.session,
             statement_params=statement_params,
-            execute_as="caller",
+            anonymous=anonymous,
         )
-        self.session._FIT_WRAPPER_SPROCS[fit_sproc_key] = fit_wrapper_sproc  # type: ignore[attr-defined]
         return fit_wrapper_sproc
     def _build_fit_predict_wrapper_sproc(
@@ -333,7 +323,9 @@ class SnowparkModelTrainer:
             # write into a temp table in sproc and load the table from outside
             session.write_pandas(
-                fit_predict_result_pd, fit_predict_result_name, auto_create_table=True, table_type="temp"
+                fit_predict_result_pd,
+                fit_predict_result_name,
+                overwrite=True,
             )
             # Note: you can add something like  + "|" + str(df) to the return string
@@ -414,13 +406,13 @@ class SnowparkModelTrainer:
             with open(local_transform_file_path, mode="r+b") as local_transform_file_obj:
                 estimator = cp.load(local_transform_file_obj)
-            argspec = inspect.getfullargspec(estimator.fit)
+            params = inspect.signature(estimator.fit).parameters
             args = {"X": df[input_cols]}
             if label_cols:
-                label_arg_name = "Y" if "Y" in argspec.args else "y"
+                label_arg_name = "Y" if "Y" in params else "y"
                 args[label_arg_name] = df[label_cols].squeeze()
-            if sample_weight_col is not None and "sample_weight" in argspec.args:
+            if sample_weight_col is not None and "sample_weight" in params:
                 args["sample_weight"] = df[sample_weight_col].squeeze()
             fit_transform_result = estimator.fit_transform(**args)
@@ -468,16 +460,14 @@ class SnowparkModelTrainer:
             session.write_pandas(
                 transformed_pandas_df,
                 fit_transform_result_name,
-                auto_create_table=True,
-                table_type="temp",
-                quote_identifiers=False,
+                overwrite=True,
             )
             return str(os.path.basename(local_result_file_name))
         return fit_transform_wrapper_function
-    def _get_fit_predict_wrapper_sproc_anonymous(self, statement_params: Dict[str, str]) -> StoredProcedure:
+    def _get_fit_predict_wrapper_sproc(self, statement_params: Dict[str, str], anonymous: bool) -> StoredProcedure:
         model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
         fit_predict_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
@@ -494,49 +484,12 @@ class SnowparkModelTrainer:
             replace=True,
             session=self.session,
             statement_params=statement_params,
-            anonymous=True,
-            execute_as="caller",
+            anonymous=anonymous,
         )
         return fit_predict_wrapper_sproc
-    def _get_fit_predict_wrapper_sproc(self, statement_params: Dict[str, str]) -> StoredProcedure:
-        # If the sproc already exists, don't register.
-        if not hasattr(self.session, "_FIT_WRAPPER_SPROCS"):
-            self.session._FIT_WRAPPER_SPROCS: Dict[str, StoredProcedure] = {}  # type: ignore[attr-defined, misc]
-        model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
-        fit_predict_sproc_key = model_spec.__class__.__name__ + "_fit_predict"
-        if fit_predict_sproc_key in self.session._FIT_WRAPPER_SPROCS:  # type: ignore[attr-defined]
-            fit_sproc: StoredProcedure = self.session._FIT_WRAPPER_SPROCS[  # type: ignore[attr-defined]
-                fit_predict_sproc_key
-            ]
-            return fit_sproc
-        fit_predict_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
-        relaxed_dependencies = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-            pkg_versions=model_spec.pkgDependencies, session=self.session
-        )
-        fit_predict_wrapper_sproc = self.session.sproc.register(
-            func=self._build_fit_predict_wrapper_sproc(model_spec=model_spec),
-            is_permanent=False,
-            name=fit_predict_sproc_name,
-            packages=["snowflake-snowpark-python"] + relaxed_dependencies,  # type: ignore[arg-type]
-            replace=True,
-            session=self.session,
-            statement_params=statement_params,
-            execute_as="caller",
-        )
-        self.session._FIT_WRAPPER_SPROCS[  # type: ignore[attr-defined]
-            fit_predict_sproc_key
-        ] = fit_predict_wrapper_sproc
-        return fit_predict_wrapper_sproc
-    def _get_fit_transform_wrapper_sproc_anonymous(self, statement_params: Dict[str, str]) -> StoredProcedure:
+    def _get_fit_transform_wrapper_sproc(self, statement_params: Dict[str, str], anonymous: bool) -> StoredProcedure:
         model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
         fit_transform_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
@@ -553,44 +506,8 @@ class SnowparkModelTrainer:
             replace=True,
             session=self.session,
             statement_params=statement_params,
-            anonymous=True,
-            execute_as="caller",
+            anonymous=anonymous,
         )
-        return fit_transform_wrapper_sproc
-    def _get_fit_transform_wrapper_sproc(self, statement_params: Dict[str, str]) -> StoredProcedure:
-        # If the sproc already exists, don't register.
-        if not hasattr(self.session, "_FIT_WRAPPER_SPROCS"):
-            self.session._FIT_WRAPPER_SPROCS: Dict[str, StoredProcedure] = {}  # type: ignore[attr-defined, misc]
-        model_spec = ModelSpecificationsBuilder.build(model=self.estimator)
-        fit_transform_sproc_key = model_spec.__class__.__name__ + "_fit_transform"
-        if fit_transform_sproc_key in self.session._FIT_WRAPPER_SPROCS:  # type: ignore[attr-defined]
-            fit_sproc: StoredProcedure = self.session._FIT_WRAPPER_SPROCS[  # type: ignore[attr-defined]
-                fit_transform_sproc_key
-            ]
-            return fit_sproc
-        fit_transform_sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
-        relaxed_dependencies = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-            pkg_versions=model_spec.pkgDependencies, session=self.session
-        )
-        fit_transform_wrapper_sproc = self.session.sproc.register(
-            func=self._build_fit_transform_wrapper_sproc(model_spec=model_spec),
-            is_permanent=False,
-            name=fit_transform_sproc_name,
-            packages=["snowflake-snowpark-python"] + relaxed_dependencies,  # type: ignore[arg-type]
-            replace=True,
-            session=self.session,
-            statement_params=statement_params,
-            execute_as="caller",
-        )
-        self.session._FIT_WRAPPER_SPROCS[  # type: ignore[attr-defined]
-            fit_transform_sproc_key
-        ] = fit_transform_wrapper_sproc
         return fit_transform_wrapper_sproc
@@ -629,9 +546,9 @@ class SnowparkModelTrainer:
         # Call fit sproc
         if _ENABLE_ANONYMOUS_SPROC:
-            fit_wrapper_sproc = self._get_fit_wrapper_sproc_anonymous(statement_params=statement_params)
+            fit_wrapper_sproc = self._get_fit_wrapper_sproc(statement_params=statement_params, anonymous=True)
         else:
-            fit_wrapper_sproc = self._get_fit_wrapper_sproc(statement_params=statement_params)
+            fit_wrapper_sproc = self._get_fit_wrapper_sproc(statement_params=statement_params, anonymous=False)
         try:
             sproc_export_file_name: str = fit_wrapper_sproc(
@@ -665,6 +582,7 @@ class SnowparkModelTrainer:
         self,
         expected_output_cols_list: List[str],
         drop_input_cols: Optional[bool] = False,
+        example_output_pd_df: Optional[pd.DataFrame] = None,
     ) -> Tuple[Union[DataFrame, pd.DataFrame], object]:
         """Trains the model by pushing down the compute into Snowflake using stored procedures.
         This API is different from fit itself because it would also provide the predict
@@ -675,6 +593,11 @@ class SnowparkModelTrainer:
                 name as a list. Defaults to None.
             drop_input_cols (Optional[bool]): Boolean to determine drop
                 the input columns from the output dataset or not
+            example_output_pd_df (Optional[pd.DataFrame]): Example output dataframe
+                This is to create a temp table in the client side with df_one_row. This can maintain the same column
+                name and data type as the output dataframe. Within the sproc, we don't need to create another temp table
+                again - instead, we overwrite into this table without changing the schema.
+                This is not used in PandasModelTrainer.
         Returns:
             Tuple[Union[DataFrame, pd.DataFrame], object]: [predicted dataset, estimator]
@@ -702,12 +625,35 @@ class SnowparkModelTrainer:
         # Call fit sproc
         if _ENABLE_ANONYMOUS_SPROC:
-            fit_predict_wrapper_sproc = self._get_fit_predict_wrapper_sproc_anonymous(statement_params=statement_params)
+            fit_predict_wrapper_sproc = self._get_fit_predict_wrapper_sproc(
+                statement_params=statement_params, anonymous=True
+            )
         else:
-            fit_predict_wrapper_sproc = self._get_fit_predict_wrapper_sproc(statement_params=statement_params)
+            fit_predict_wrapper_sproc = self._get_fit_predict_wrapper_sproc(
+                statement_params=statement_params, anonymous=False
+            )
         fit_predict_result_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.TABLE)
+        # Create a temp table in advance to store the output
+        # This would allow us to use the same table outside the stored procedure
+        if not drop_input_cols:
+            assert example_output_pd_df is not None
+            remove_dataset_col_name_exist_in_output_col = list(set(dataset.columns) - set(example_output_pd_df.columns))
+            pd_df_one_row = (
+                dataset.select(remove_dataset_col_name_exist_in_output_col)
+                .limit(1)
+                .to_pandas(statement_params=statement_params)
+            )
+            example_output_pd_df = pd.concat([pd_df_one_row, example_output_pd_df], axis=1)
+        self.session.write_pandas(
+            example_output_pd_df,
+            fit_predict_result_name,
+            auto_create_table=True,
+            table_type="temp",
+        )
         sproc_export_file_name: str = fit_predict_wrapper_sproc(
             self.session,
             queries,
@@ -769,14 +715,32 @@ class SnowparkModelTrainer:
         # Call fit sproc
         if _ENABLE_ANONYMOUS_SPROC:
-            fit_transform_wrapper_sproc = self._get_fit_transform_wrapper_sproc_anonymous(
-                statement_params=statement_params
+            fit_transform_wrapper_sproc = self._get_fit_transform_wrapper_sproc(
+                statement_params=statement_params, anonymous=True
             )
         else:
-            fit_transform_wrapper_sproc = self._get_fit_transform_wrapper_sproc(statement_params=statement_params)
+            fit_transform_wrapper_sproc = self._get_fit_transform_wrapper_sproc(
+                statement_params=statement_params, anonymous=False
+            )
         fit_transform_result_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.TABLE)
+        # Create a temp table in advance to store the output
+        # This would allow us to use the same table outside the stored procedure
+        df_one_line = dataset.limit(1).to_pandas(statement_params=statement_params)
+        df_one_line[
+            expected_output_cols_list[0]
+        ] = "[0]"  # Add one column as the output_col; this is a dummy value to represent the OBJECT type
+        if drop_input_cols:
+            self.session.write_pandas(
+                df_one_line[expected_output_cols_list[0]],
+                fit_transform_result_name,
+                auto_create_table=True,
+                table_type="temp",
+            )
+        else:
+            self.session.write_pandas(df_one_line, fit_transform_result_name, auto_create_table=True, table_type="temp")
         sproc_export_file_name: str = fit_transform_wrapper_sproc(
             self.session,
             queries,

snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py CHANGED Viewed

@@ -303,7 +303,6 @@ class XGBoostExternalMemoryTrainer(SnowparkModelTrainer):
             statement_params=statement_params,
             anonymous=True,
             imports=list(import_file_paths),
-            execute_as="caller",
         )  # type: ignore[misc]
         def fit_wrapper_sproc(
             session: Session,

snowflake-ml-python 1.6.1__py3-none-any.whl → 1.6.3__py3-none-any.whl

snowflake-ml-python 1.6.1py3-none-any.whl → 1.6.3py3-none-any.whl