PyPI - snowflake-ml-python - Versions diffs - 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

snowflake-ml-python 1.4.1py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (206) hide show

snowflake/ml/_internal/env_utils.py +66 -31
snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
snowflake/ml/_internal/exceptions/error_codes.py +3 -0
snowflake/ml/_internal/lineage/data_source.py +10 -0
snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
snowflake/ml/dataset/__init__.py +10 -0
snowflake/ml/dataset/dataset.py +454 -129
snowflake/ml/dataset/dataset_factory.py +53 -0
snowflake/ml/dataset/dataset_metadata.py +103 -0
snowflake/ml/dataset/dataset_reader.py +202 -0
snowflake/ml/feature_store/feature_store.py +408 -282
snowflake/ml/feature_store/feature_view.py +37 -8
snowflake/ml/fileset/embedded_stage_fs.py +146 -0
snowflake/ml/fileset/sfcfs.py +0 -4
snowflake/ml/fileset/snowfs.py +159 -0
snowflake/ml/fileset/stage_fs.py +1 -4
snowflake/ml/model/__init__.py +2 -2
snowflake/ml/model/_api.py +16 -1
snowflake/ml/model/_client/model/model_impl.py +27 -0
snowflake/ml/model/_client/model/model_version_impl.py +135 -0
snowflake/ml/model/_client/ops/model_ops.py +137 -67
snowflake/ml/model/_client/sql/model.py +16 -14
snowflake/ml/model/_client/sql/model_version.py +109 -1
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
snowflake/ml/model/_model_composer/model_composer.py +22 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +22 -0
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +11 -0
snowflake/ml/model/_packager/model_env/model_env.py +41 -0
snowflake/ml/model/_packager/model_meta/model_meta.py +1 -5
snowflake/ml/model/_packager/model_packager.py +0 -3
snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
snowflake/ml/modeling/_internal/model_trainer.py +7 -0
snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +24 -2
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -52
snowflake/ml/modeling/cluster/affinity_propagation.py +51 -52
snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -52
snowflake/ml/modeling/cluster/birch.py +53 -52
snowflake/ml/modeling/cluster/bisecting_k_means.py +53 -52
snowflake/ml/modeling/cluster/dbscan.py +51 -52
snowflake/ml/modeling/cluster/feature_agglomeration.py +53 -52
snowflake/ml/modeling/cluster/k_means.py +53 -52
snowflake/ml/modeling/cluster/mean_shift.py +51 -52
snowflake/ml/modeling/cluster/mini_batch_k_means.py +53 -52
snowflake/ml/modeling/cluster/optics.py +51 -52
snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -52
snowflake/ml/modeling/cluster/spectral_clustering.py +51 -52
snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -52
snowflake/ml/modeling/compose/column_transformer.py +53 -52
snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -52
snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -52
snowflake/ml/modeling/covariance/empirical_covariance.py +51 -52
snowflake/ml/modeling/covariance/graphical_lasso.py +51 -52
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -52
snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -52
snowflake/ml/modeling/covariance/min_cov_det.py +51 -52
snowflake/ml/modeling/covariance/oas.py +51 -52
snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -52
snowflake/ml/modeling/decomposition/dictionary_learning.py +53 -52
snowflake/ml/modeling/decomposition/factor_analysis.py +53 -52
snowflake/ml/modeling/decomposition/fast_ica.py +53 -52
snowflake/ml/modeling/decomposition/incremental_pca.py +53 -52
snowflake/ml/modeling/decomposition/kernel_pca.py +53 -52
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +53 -52
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +53 -52
snowflake/ml/modeling/decomposition/pca.py +53 -52
snowflake/ml/modeling/decomposition/sparse_pca.py +53 -52
snowflake/ml/modeling/decomposition/truncated_svd.py +53 -52
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +53 -52
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -52
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -52
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -52
snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -52
snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -52
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -52
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -52
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -52
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -52
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -52
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -52
snowflake/ml/modeling/ensemble/isolation_forest.py +51 -52
snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -52
snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -52
snowflake/ml/modeling/ensemble/stacking_regressor.py +53 -52
snowflake/ml/modeling/ensemble/voting_classifier.py +53 -52
snowflake/ml/modeling/ensemble/voting_regressor.py +53 -52
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +53 -52
snowflake/ml/modeling/feature_selection/select_fdr.py +53 -52
snowflake/ml/modeling/feature_selection/select_fpr.py +53 -52
snowflake/ml/modeling/feature_selection/select_fwe.py +53 -52
snowflake/ml/modeling/feature_selection/select_k_best.py +53 -52
snowflake/ml/modeling/feature_selection/select_percentile.py +53 -52
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +53 -52
snowflake/ml/modeling/feature_selection/variance_threshold.py +53 -52
snowflake/ml/modeling/framework/base.py +63 -36
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -52
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -52
snowflake/ml/modeling/impute/iterative_imputer.py +53 -52
snowflake/ml/modeling/impute/knn_imputer.py +53 -52
snowflake/ml/modeling/impute/missing_indicator.py +53 -52
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +53 -52
snowflake/ml/modeling/kernel_approximation/nystroem.py +53 -52
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +53 -52
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +53 -52
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +53 -52
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -52
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -52
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -52
snowflake/ml/modeling/linear_model/ard_regression.py +51 -52
snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -52
snowflake/ml/modeling/linear_model/elastic_net.py +51 -52
snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -52
snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -52
snowflake/ml/modeling/linear_model/huber_regressor.py +51 -52
snowflake/ml/modeling/linear_model/lars.py +51 -52
snowflake/ml/modeling/linear_model/lars_cv.py +51 -52
snowflake/ml/modeling/linear_model/lasso.py +51 -52
snowflake/ml/modeling/linear_model/lasso_cv.py +51 -52
snowflake/ml/modeling/linear_model/lasso_lars.py +51 -52
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -52
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -52
snowflake/ml/modeling/linear_model/linear_regression.py +51 -52
snowflake/ml/modeling/linear_model/logistic_regression.py +51 -52
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -52
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -52
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -52
snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -52
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -52
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -52
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -52
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -52
snowflake/ml/modeling/linear_model/perceptron.py +51 -52
snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -52
snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -52
snowflake/ml/modeling/linear_model/ridge.py +51 -52
snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -52
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -52
snowflake/ml/modeling/linear_model/ridge_cv.py +51 -52
snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -52
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -52
snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -52
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -52
snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -52
snowflake/ml/modeling/manifold/isomap.py +53 -52
snowflake/ml/modeling/manifold/mds.py +53 -52
snowflake/ml/modeling/manifold/spectral_embedding.py +53 -52
snowflake/ml/modeling/manifold/tsne.py +53 -52
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -52
snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -52
snowflake/ml/modeling/model_selection/grid_search_cv.py +21 -23
snowflake/ml/modeling/model_selection/randomized_search_cv.py +38 -20
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -52
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -52
snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -52
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -52
snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -52
snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -52
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -52
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -52
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -52
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -52
snowflake/ml/modeling/neighbors/kernel_density.py +51 -52
snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -52
snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -52
snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -52
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +53 -52
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -52
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -52
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +53 -52
snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -52
snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -52
snowflake/ml/modeling/pipeline/pipeline.py +514 -32
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +12 -0
snowflake/ml/modeling/preprocessing/polynomial_features.py +53 -52
snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -52
snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -52
snowflake/ml/modeling/svm/linear_svc.py +51 -52
snowflake/ml/modeling/svm/linear_svr.py +51 -52
snowflake/ml/modeling/svm/nu_svc.py +51 -52
snowflake/ml/modeling/svm/nu_svr.py +51 -52
snowflake/ml/modeling/svm/svc.py +51 -52
snowflake/ml/modeling/svm/svr.py +51 -52
snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -52
snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -52
snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -52
snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -52
snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -52
snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -52
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -52
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -52
snowflake/ml/registry/model_registry.py +3 -149
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +63 -2
{snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/RECORD +204 -196
snowflake/ml/registry/_artifact_manager.py +0 -156
snowflake/ml/registry/artifact.py +0 -46
{snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
{snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0

snowflake/ml/model/_deploy_client/snowservice/deploy.py CHANGED Viewed

@@ -346,6 +346,7 @@ class SnowServiceDeployment:
         (db, schema, _, _) = identifier.parse_schema_level_object_identifier(service_func_name)
         self._service_name = identifier.get_schema_level_object_identifier(db, schema, f"service_{model_id}")
+        self._job_name = identifier.get_schema_level_object_identifier(db, schema, f"build_{model_id}")
         # Spec file and future deployment related artifacts will be stored under {stage}/models/{model_id}
         self._model_artifact_stage_location = posixpath.join(deployment_stage_path, "models", self.id)
         self.debug_dir: Optional[str] = None
@@ -468,6 +469,7 @@ class SnowServiceDeployment:
                 session=self.session,
                 artifact_stage_location=self._model_artifact_stage_location,
                 compute_pool=self.options.compute_pool,
+                job_name=self._job_name,
                 external_access_integrations=self.options.external_access_integrations,
             )
         else:

snowflake/ml/model/_deploy_client/utils/constants.py CHANGED Viewed

@@ -17,11 +17,6 @@ class ResourceStatus(Enum):
     INTERNAL_ERROR = "INTERNAL_ERROR"  # there was an internal service error.
-RESOURCE_TO_STATUS_FUNCTION_MAPPING = {
-    ResourceType.SERVICE: "SYSTEM$GET_SERVICE_STATUS",
-    ResourceType.JOB: "SYSTEM$GET_JOB_STATUS",
-}
 PREDICT = "predict"
 STAGE = "stage"
 COMPUTE_POOL = "compute_pool"

snowflake/ml/model/_deploy_client/utils/snowservice_client.py CHANGED Viewed

@@ -70,13 +70,16 @@ class SnowServiceClient:
         logger.debug(f"Create service with SQL: \n {sql}")
         self.session.sql(sql).collect()
-    def create_job(self, compute_pool: str, spec_stage_location: str, external_access_integrations: List[str]) -> None:
+    def create_job(
+        self, job_name: str, compute_pool: str, spec_stage_location: str, external_access_integrations: List[str]
+    ) -> None:
         """Execute the job creation SQL command. Note that the job creation is synchronous, hence we execute it in a
         async way so that we can query the log in the meantime.
         Upon job failure, full job container log will be logged.
         Args:
+            job_name: name of the job
             compute_pool: name of the compute pool
             spec_stage_location: path to the stage location where the spec is located at.
             external_access_integrations: EAIs for network connection.
@@ -84,19 +87,18 @@ class SnowServiceClient:
         stage, path = uri.get_stage_and_path(spec_stage_location)
         sql = textwrap.dedent(
             f"""
-            EXECUTE SERVICE
+            EXECUTE JOB SERVICE
             IN COMPUTE POOL {compute_pool}
             FROM {stage}
-            SPEC = '{path}'
+            SPECIFICATION_FILE = '{path}'
+            NAME = {job_name}
             EXTERNAL_ACCESS_INTEGRATIONS = ({', '.join(external_access_integrations)})
             """
         )
         logger.debug(f"Create job with SQL: \n {sql}")
-        cur = self.session._conn._conn.cursor()
-        cur.execute_async(sql)
-        job_id = cur._sfqid
+        self.session.sql(sql).collect_nowait()
         self.block_until_resource_is_ready(
-            resource_name=str(job_id),
+            resource_name=job_name,
             resource_type=constants.ResourceType.JOB,
             container_name=constants.KANIKO_CONTAINER_NAME,
             max_retries=240,
@@ -182,10 +184,7 @@ class SnowServiceClient:
         """
         assert resource_type == constants.ResourceType.SERVICE or resource_type == constants.ResourceType.JOB
         query_command = ""
-        if resource_type == constants.ResourceType.SERVICE:
-            query_command = f"CALL SYSTEM$GET_SERVICE_LOGS('{resource_name}', '0', '{container_name}')"
-        elif resource_type == constants.ResourceType.JOB:
-            query_command = f"CALL SYSTEM$GET_JOB_LOGS('{resource_name}', '{container_name}')"
+        query_command = f"CALL SYSTEM$GET_SERVICE_LOGS('{resource_name}', '0', '{container_name}')"
         logger.warning(
             f"Best-effort log streaming from SPCS will be enabled when python logging level is set to INFO."
             f"Alternatively, you can also query the logs by running the query '{query_command}'"
@@ -201,7 +200,7 @@ class SnowServiceClient:
                 )
                 lsp.process_new_logs(resource_log, log_level=logging.INFO)
-            status = self.get_resource_status(resource_name=resource_name, resource_type=resource_type)
+            status = self.get_resource_status(resource_name=resource_name)
             if resource_type == constants.ResourceType.JOB and status == constants.ResourceStatus.DONE:
                 return
@@ -246,52 +245,24 @@ class SnowServiceClient:
     def get_resource_log(
         self, resource_name: str, resource_type: constants.ResourceType, container_name: str
     ) -> Optional[str]:
-        if resource_type == constants.ResourceType.SERVICE:
-            try:
-                row = self.session.sql(
-                    f"CALL SYSTEM$GET_SERVICE_LOGS('{resource_name}', '0', '{container_name}')"
-                ).collect()
-                return str(row[0]["SYSTEM$GET_SERVICE_LOGS"])
-            except Exception:
-                return None
-        elif resource_type == constants.ResourceType.JOB:
-            try:
-                row = self.session.sql(f"CALL SYSTEM$GET_JOB_LOGS('{resource_name}', '{container_name}')").collect()
-                return str(row[0]["SYSTEM$GET_JOB_LOGS"])
-            except Exception:
-                return None
-        else:
-            raise snowml_exceptions.SnowflakeMLException(
-                error_code=error_codes.NOT_IMPLEMENTED,
-                original_exception=NotImplementedError(
-                    f"{resource_type.name} is not yet supported in get_resource_log function"
-                ),
-            )
-    def get_resource_status(
-        self, resource_name: str, resource_type: constants.ResourceType
-    ) -> Optional[constants.ResourceStatus]:
+        try:
+            row = self.session.sql(
+                f"CALL SYSTEM$GET_SERVICE_LOGS('{resource_name}', '0', '{container_name}')"
+            ).collect()
+            return str(row[0]["SYSTEM$GET_SERVICE_LOGS"])
+        except Exception:
+            return None
+    def get_resource_status(self, resource_name: str) -> Optional[constants.ResourceStatus]:
         """Get resource status.
         Args:
             resource_name: Name of the resource.
-            resource_type: Type of the resource.
-        Raises:
-            SnowflakeMLException: If resource type does not have a corresponding system function for querying status.
-            SnowflakeMLException: If corresponding status call failed.
         Returns:
             Optional[constants.ResourceStatus]: The status of the resource, or None if the resource status is empty.
         """
-        if resource_type not in constants.RESOURCE_TO_STATUS_FUNCTION_MAPPING:
-            raise snowml_exceptions.SnowflakeMLException(
-                error_code=error_codes.INVALID_ARGUMENT,
-                original_exception=ValueError(
-                    f"Status querying is not supported for resources of type '{resource_type}'."
-                ),
-            )
-        status_func = constants.RESOURCE_TO_STATUS_FUNCTION_MAPPING[resource_type]
+        status_func = "SYSTEM$GET_SERVICE_STATUS"
         try:
             row = self.session.sql(f"CALL {status_func}('{resource_name}');").collect()
         except Exception:

snowflake/ml/model/_model_composer/model_composer.py CHANGED Viewed

@@ -8,8 +8,10 @@ from typing import Any, Dict, List, Optional
 from absl import logging
 from packaging import requirements
+from typing_extensions import deprecated
 from snowflake.ml._internal import env as snowml_env, env_utils, file_utils
+from snowflake.ml._internal.lineage import data_source
 from snowflake.ml.model import model_signature, type_hints as model_types
 from snowflake.ml.model._model_composer.model_manifest import model_manifest
 from snowflake.ml.model._packager import model_packager
@@ -134,6 +136,7 @@ class ModelComposer:
             model_meta=self.packager.meta,
             model_file_rel_path=pathlib.PurePosixPath(self.model_file_rel_path),
             options=options,
+            data_sources=self._get_data_sources(model),
         )
         file_utils.upload_directory_to_stage(
@@ -143,7 +146,8 @@ class ModelComposer:
             statement_params=self._statement_params,
         )
-    def load(
+    @deprecated("Only used by PrPr model registry. Use static method version of load instead.")
+    def legacy_load(
         self,
         *,
         meta_only: bool = False,
@@ -163,3 +167,20 @@ class ModelComposer:
         with zipfile.ZipFile(self.model_local_path, mode="r", compression=zipfile.ZIP_DEFLATED) as zf:
             zf.extractall(path=self._packager_workspace_path)
         self.packager.load(meta_only=meta_only, options=options)
+    @staticmethod
+    def load(
+        workspace_path: pathlib.Path,
+        *,
+        meta_only: bool = False,
+        options: Optional[model_types.ModelLoadOption] = None,
+    ) -> model_packager.ModelPackager:
+        mp = model_packager.ModelPackager(str(workspace_path / ModelComposer.MODEL_DIR_REL_PATH))
+        mp.load(meta_only=meta_only, options=options)
+        return mp
+    def _get_data_sources(self, model: model_types.SupportedModelType) -> Optional[List[data_source.DataSource]]:
+        data_sources = getattr(model, "_data_sources", None)
+        if isinstance(data_sources, list) and all(isinstance(item, data_source.DataSource) for item in data_sources):
+            return data_sources
+        return None

snowflake/ml/model/_model_composer/model_manifest/model_manifest.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import List, Optional, cast
 import yaml
+from snowflake.ml._internal.lineage import data_source
 from snowflake.ml.model import type_hints
 from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
 from snowflake.ml.model._model_composer.model_method import (
@@ -36,6 +37,7 @@ class ModelManifest:
         model_meta: model_meta_api.ModelMetadata,
         model_file_rel_path: pathlib.PurePosixPath,
         options: Optional[type_hints.ModelSaveOption] = None,
+        data_sources: Optional[List[data_source.DataSource]] = None,
     ) -> None:
         if options is None:
             options = {}
@@ -90,6 +92,10 @@ class ModelManifest:
             ],
         )
+        lineage_sources = self._extract_lineage_info(data_sources)
+        if lineage_sources:
+            manifest_dict["lineage_sources"] = lineage_sources
         with (self.workspace_path / ModelManifest.MANIFEST_FILE_REL_PATH).open("w", encoding="utf-8") as f:
             # Anchors are not supported in the server, avoid that.
             yaml.SafeDumper.ignore_aliases = lambda *args: True  # type: ignore[method-assign]
@@ -108,3 +114,19 @@ class ModelManifest:
         res = cast(model_manifest_schema.ModelManifestDict, raw_input)
         return res
+    def _extract_lineage_info(
+        self, data_sources: Optional[List[data_source.DataSource]]
+    ) -> List[model_manifest_schema.LineageSourceDict]:
+        result = []
+        if data_sources:
+            for source in data_sources:
+                result.append(
+                    model_manifest_schema.LineageSourceDict(
+                        # Currently, we only support lineage from Dataset.
+                        type=model_manifest_schema.LineageSourceTypes.DATASET.value,
+                        entity=source.fully_qualified_name,
+                        version=source.version,
+                    )
+                )
+        return result

snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py CHANGED Viewed

@@ -75,8 +75,19 @@ class SnowparkMLDataDict(TypedDict):
     functions: Required[List[ModelFunctionInfoDict]]
+class LineageSourceTypes(enum.Enum):
+    DATASET = "DATASET"
+class LineageSourceDict(TypedDict):
+    type: Required[str]
+    entity: Required[str]
+    version: NotRequired[str]
 class ModelManifestDict(TypedDict):
     manifest_version: Required[str]
     runtimes: Required[Dict[str, ModelRuntimeDict]]
     methods: Required[List[ModelMethodDict]]
     user_data: NotRequired[Dict[str, Any]]
+    lineage_sources: NotRequired[List[LineageSourceDict]]

snowflake/ml/model/_packager/model_env/model_env.py CHANGED Viewed

@@ -284,6 +284,7 @@ class ModelEnv:
                         " This may prevent model deploying to Snowflake Warehouse."
                     ),
                     category=UserWarning,
+                    stacklevel=2,
                 )
             if len(channel_dependencies) == 0 and channel not in self._conda_dependencies:
                 warnings.warn(
@@ -292,6 +293,7 @@ class ModelEnv:
                         " This may prevent model deploying to Snowflake Warehouse."
                     ),
                     category=UserWarning,
+                    stacklevel=2,
                 )
                 self._conda_dependencies[channel] = []
@@ -307,6 +309,7 @@ class ModelEnv:
                             " This may be unintentional."
                         ),
                         category=UserWarning,
+                        stacklevel=2,
                     )
         if pip_requirements_list:
@@ -316,6 +319,7 @@ class ModelEnv:
                     " This may prevent model deploying to Snowflake Warehouse."
                 ),
                 category=UserWarning,
+                stacklevel=2,
             )
             for pip_dependency in pip_requirements_list:
                 if any(
@@ -338,6 +342,7 @@ class ModelEnv:
                     " This may prevent model deploying to Snowflake Warehouse."
                 ),
                 category=UserWarning,
+                stacklevel=2,
             )
             for pip_dependency in pip_requirements_list:
                 if any(
@@ -372,3 +377,39 @@ class ModelEnv:
             "cuda_version": self.cuda_version,
             "snowpark_ml_version": self.snowpark_ml_version,
         }
+    def validate_with_local_env(
+        self, check_snowpark_ml_version: bool = False
+    ) -> List[env_utils.IncorrectLocalEnvironmentError]:
+        errors = []
+        try:
+            env_utils.validate_py_runtime_version(str(self._python_version))
+        except env_utils.IncorrectLocalEnvironmentError as e:
+            errors.append(e)
+        for conda_reqs in self._conda_dependencies.values():
+            for conda_req in conda_reqs:
+                try:
+                    env_utils.validate_local_installed_version_of_pip_package(
+                        env_utils.try_convert_conda_requirement_to_pip(conda_req)
+                    )
+                except env_utils.IncorrectLocalEnvironmentError as e:
+                    errors.append(e)
+        for pip_req in self._pip_requirements:
+            try:
+                env_utils.validate_local_installed_version_of_pip_package(pip_req)
+            except env_utils.IncorrectLocalEnvironmentError as e:
+                errors.append(e)
+        if check_snowpark_ml_version:
+            # For Modeling model
+            if self._snowpark_ml_version.base_version != snowml_env.VERSION:
+                errors.append(
+                    env_utils.IncorrectLocalEnvironmentError(
+                        f"The local installed version of Snowpark ML library is {snowml_env.VERSION} "
+                        f"which differs from required version {self.snowpark_ml_version}."
+                    )
+                )
+        return errors

snowflake/ml/model/_packager/model_meta/model_meta.py CHANGED Viewed

@@ -320,11 +320,7 @@ class ModelMetadata:
         with open(model_yaml_path, "w", encoding="utf-8") as out:
             yaml.SafeDumper.ignore_aliases = lambda *args: True  # type: ignore[method-assign]
-            yaml.safe_dump(
-                model_dict,
-                stream=out,
-                default_flow_style=False,
-            )
+            yaml.safe_dump(model_dict, stream=out, default_flow_style=False)
     @staticmethod
     def _validate_model_metadata(loaded_meta: Any) -> model_meta_schema.ModelMetadataDict:

snowflake/ml/model/_packager/model_packager.py CHANGED Viewed

@@ -4,7 +4,6 @@ from typing import Dict, List, Optional
 from absl import logging
-from snowflake.ml._internal import env_utils
 from snowflake.ml._internal.exceptions import (
     error_codes,
     exceptions as snowml_exceptions,
@@ -129,8 +128,6 @@ class ModelPackager:
         model_meta.load_code_path(self.local_dir_path)
-        env_utils.validate_py_runtime_version(self.meta.env.python_version)
         handler = model_handler.load_handler(self.meta.model_type)
         if handler is None:
             raise snowml_exceptions.SnowflakeMLException(

snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py CHANGED Viewed

@@ -3,6 +3,8 @@ from typing import List, Optional, Tuple
 import pandas as pd
+from snowflake.ml.modeling._internal.estimator_utils import handle_inference_result
 class PandasModelTrainer:
     """
@@ -72,11 +74,61 @@ class PandasModelTrainer:
             Tuple[pd.DataFrame, object]: [predicted dataset, estimator]
         """
         assert hasattr(self.estimator, "fit_predict")  # make type checker happy
-        args = {"X": self.dataset[self.input_cols]}
-        result = self.estimator.fit_predict(**args)
+        result = self.estimator.fit_predict(X=self.dataset[self.input_cols])
         result_df = pd.DataFrame(data=result, columns=expected_output_cols_list)
         if drop_input_cols:
             result_df = result_df
         else:
-            result_df = pd.concat([self.dataset, result_df], axis=1)
+            # in case the output column name overlap with the input column names,
+            # remove the ones in input column names
+            remove_dataset_col_name_exist_in_output_col = list(
+                set(self.dataset.columns) - set(expected_output_cols_list)
+            )
+            result_df = pd.concat([self.dataset[remove_dataset_col_name_exist_in_output_col], result_df], axis=1)
+        return (result_df, self.estimator)
+    def train_fit_transform(
+        self,
+        expected_output_cols_list: List[str],
+        drop_input_cols: Optional[bool] = False,
+    ) -> Tuple[pd.DataFrame, object]:
+        """Trains the model using specified features and target columns from the dataset.
+        This API is different from fit itself because it would also provide the transform
+        output.
+        Args:
+            expected_output_cols_list (List[str]): The output columns
+                name as a list. Defaults to None.
+            drop_input_cols (Optional[bool]): Boolean to determine whether to
+                drop the input columns from the output dataset.
+        Returns:
+            Tuple[pd.DataFrame, object]: [transformed dataset, estimator]
+        """
+        assert hasattr(self.estimator, "fit")  # make type checker happy
+        assert hasattr(self.estimator, "fit_transform")  # make type checker happy
+        argspec = inspect.getfullargspec(self.estimator.fit)
+        args = {"X": self.dataset[self.input_cols]}
+        if self.label_cols:
+            label_arg_name = "Y" if "Y" in argspec.args else "y"
+            args[label_arg_name] = self.dataset[self.label_cols].squeeze()
+        if self.sample_weight_col is not None and "sample_weight" in argspec.args:
+            args["sample_weight"] = self.dataset[self.sample_weight_col].squeeze()
+        inference_res = self.estimator.fit_transform(**args)
+        transformed_numpy_array, output_cols = handle_inference_result(
+            inference_res=inference_res, output_cols=expected_output_cols_list, inference_method="fit_transform"
+        )
+        result_df = pd.DataFrame(data=transformed_numpy_array, columns=output_cols)
+        if drop_input_cols:
+            result_df = result_df
+        else:
+            # in case the output column name overlap with the input column names,
+            # remove the ones in input column names
+            remove_dataset_col_name_exist_in_output_col = list(set(self.dataset.columns) - set(output_cols))
+            result_df = pd.concat([self.dataset[remove_dataset_col_name_exist_in_output_col], result_df], axis=1)
         return (result_df, self.estimator)

snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py CHANGED Viewed

@@ -72,24 +72,40 @@ class MLRuntimeTransformHandlers:
         """
-        handler = SnowparkTransformHandlers(
-            dataset=self.dataset,
-            estimator=self.estimator,
-            class_name=self._class_name,
-            subproject=self._subproject,
-            autogenerated=self._autogenerated,
-        )
-        return handler.batch_inference(
-            inference_method,
-            input_cols,
-            expected_output_cols,
-            session,
-            dependencies,
-            drop_input_cols,
-            expected_output_cols_type,
-            *args,
-            **kwargs,
-        )
+        mlrs_inference_methods = ["predict", "predict_proba", "predict_log_proba"]
+        if inference_method in mlrs_inference_methods:
+            result_df = self.client.inference(
+                estimator=self.estimator,
+                dataset=self.dataset,
+                inference_method=inference_method,
+                input_cols=input_cols,
+                output_cols=expected_output_cols,
+                drop_input_cols=drop_input_cols,
+            )
+        else:
+            handler = SnowparkTransformHandlers(
+                dataset=self.dataset,
+                estimator=self.estimator,
+                class_name=self._class_name,
+                subproject=self._subproject,
+                autogenerated=self._autogenerated,
+            )
+            result_df = handler.batch_inference(
+                inference_method,
+                input_cols,
+                expected_output_cols,
+                session,
+                dependencies,
+                drop_input_cols,
+                expected_output_cols_type,
+                *args,
+                **kwargs,
+            )
+        assert isinstance(result_df, DataFrame)  # mypy - The MLRS return types are annotated as `object`.
+        return result_df
     def score(
         self,

snowflake/ml/modeling/_internal/model_trainer.py CHANGED Viewed

@@ -22,3 +22,10 @@ class ModelTrainer(Protocol):
         drop_input_cols: Optional[bool] = False,
     ) -> Tuple[Union[DataFrame, pd.DataFrame], object]:
         raise NotImplementedError
+    def train_fit_transform(
+        self,
+        expected_output_cols_list: List[str],
+        drop_input_cols: Optional[bool] = False,
+    ) -> Tuple[Union[DataFrame, pd.DataFrame], object]:
+        raise NotImplementedError

snowflake/ml/modeling/_internal/model_trainer_builder.py CHANGED Viewed

@@ -138,21 +138,13 @@ class ModelTrainerBuilder:
         cls,
         estimator: object,
         dataset: Union[DataFrame, pd.DataFrame],
-        input_cols: Optional[List[str]] = None,
+        input_cols: List[str],
         autogenerated: bool = False,
         subproject: str = "",
     ) -> ModelTrainer:
         """
         Builder method that creates an appropriate ModelTrainer instance based on the given params.
         """
-        if input_cols is None:
-            raise exceptions.SnowflakeMLException(
-                error_code=error_codes.NOT_FOUND,
-                original_exception=ValueError(
-                    "The input column names (input_cols) is None.\n"
-                    "Please put your input_cols when initializing the estimator\n"
-                ),
-            )
         if isinstance(dataset, pd.DataFrame):
             return PandasModelTrainer(
                 estimator=estimator,
@@ -179,3 +171,44 @@ class ModelTrainerBuilder:
                 f"Unexpected dataset type: {type(dataset)}."
                 "Supported dataset types: snowpark.DataFrame, pandas.DataFrame."
             )
+    @classmethod
+    def build_fit_transform(
+        cls,
+        estimator: object,
+        dataset: Union[DataFrame, pd.DataFrame],
+        input_cols: List[str],
+        label_cols: Optional[List[str]] = None,
+        sample_weight_col: Optional[str] = None,
+        autogenerated: bool = False,
+        subproject: str = "",
+    ) -> ModelTrainer:
+        """
+        Builder method that creates an appropriate ModelTrainer instance based on the given params.
+        """
+        if isinstance(dataset, pd.DataFrame):
+            return PandasModelTrainer(
+                estimator=estimator,
+                dataset=dataset,
+                input_cols=input_cols,
+                label_cols=label_cols,
+                sample_weight_col=sample_weight_col,
+            )
+        elif isinstance(dataset, DataFrame):
+            trainer_klass = SnowparkModelTrainer
+            init_args = {
+                "estimator": estimator,
+                "dataset": dataset,
+                "session": dataset._session,
+                "input_cols": input_cols,
+                "label_cols": label_cols,
+                "sample_weight_col": sample_weight_col,
+                "autogenerated": autogenerated,
+                "subproject": subproject,
+            }
+            return trainer_klass(**init_args)  # type: ignore[arg-type]
+        else:
+            raise TypeError(
+                f"Unexpected dataset type: {type(dataset)}."
+                "Supported dataset types: snowpark.DataFrame, pandas.DataFrame."
+            )

snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py CHANGED Viewed

@@ -9,7 +9,11 @@ import cloudpickle as cp
 import pandas as pd
 from snowflake.ml._internal import telemetry
-from snowflake.ml._internal.utils import identifier, snowpark_dataframe_utils
+from snowflake.ml._internal.utils import (
+    identifier,
+    pkg_version_utils,
+    snowpark_dataframe_utils,
+)
 from snowflake.ml._internal.utils.query_result_checker import SqlResultValidator
 from snowflake.ml._internal.utils.temp_file_utils import (
     cleanup_temp_files,
@@ -91,6 +95,7 @@ class SnowparkTransformHandlers:
             A new dataset of the same type as the input dataset.
         """
+        dependencies = self._get_validated_snowpark_dependencies(session, dependencies)
         dataset = self.dataset
         estimator = self.estimator
         # Register vectorized UDF for batch inference
@@ -210,7 +215,8 @@ class SnowparkTransformHandlers:
         Returns:
             An accuracy score for the model on the given test data.
         """
+        dependencies = self._get_validated_snowpark_dependencies(session, dependencies)
+        dependencies.append("snowflake-snowpark-python")
         dataset = self.dataset
         estimator = self.estimator
         dataset = snowpark_dataframe_utils.cast_snowpark_dataframe_column_types(dataset)
@@ -335,3 +341,19 @@ class SnowparkTransformHandlers:
         cleanup_temp_files([local_score_file_name])
         return score
+    def _get_validated_snowpark_dependencies(self, session: Session, dependencies: List[str]) -> List[str]:
+        """A helper function to validate dependencies and return the available packages that exists
+        in the snowflake anaconda channel
+        Args:
+            session: the active snowpark Session
+            dependencies: unvalidated dependencies
+        Returns:
+            A list of packages present in the snoflake conda channel.
+        """
+        return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
+            pkg_versions=dependencies, session=session, subproject=self._subproject
+        )

snowflake-ml-python 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

snowflake-ml-python 1.4.1py3-none-any.whl → 1.5.0py3-none-any.whl