PyPI - snowflake-ml-python - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (174) hide show

snowflake/ml/_internal/file_utils.py +8 -35
snowflake/ml/_internal/utils/identifier.py +74 -7
snowflake/ml/model/_core_requirements.py +1 -1
snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
snowflake/ml/model/_handlers/_base.py +3 -1
snowflake/ml/model/_handlers/sklearn.py +1 -0
snowflake/ml/model/_handlers/xgboost.py +1 -1
snowflake/ml/model/_model.py +24 -19
snowflake/ml/model/_model_meta.py +24 -15
snowflake/ml/model/type_hints.py +5 -11
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
snowflake/ml/modeling/cluster/birch.py +28 -17
snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
snowflake/ml/modeling/cluster/dbscan.py +28 -17
snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
snowflake/ml/modeling/cluster/k_means.py +28 -17
snowflake/ml/modeling/cluster/mean_shift.py +28 -17
snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
snowflake/ml/modeling/cluster/optics.py +28 -17
snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
snowflake/ml/modeling/compose/column_transformer.py +28 -17
snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
snowflake/ml/modeling/covariance/oas.py +28 -17
snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
snowflake/ml/modeling/decomposition/pca.py +28 -17
snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
snowflake/ml/modeling/impute/knn_imputer.py +28 -17
snowflake/ml/modeling/impute/missing_indicator.py +28 -17
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
snowflake/ml/modeling/linear_model/lars.py +28 -17
snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso.py +28 -17
snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
snowflake/ml/modeling/linear_model/perceptron.py +28 -17
snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
snowflake/ml/modeling/linear_model/ridge.py +28 -17
snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
snowflake/ml/modeling/manifold/isomap.py +28 -17
snowflake/ml/modeling/manifold/mds.py +28 -17
snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
snowflake/ml/modeling/manifold/tsne.py +28 -17
snowflake/ml/modeling/metrics/classification.py +6 -1
snowflake/ml/modeling/metrics/regression.py +517 -9
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
snowflake/ml/modeling/pipeline/pipeline.py +24 -0
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
snowflake/ml/modeling/svm/linear_svc.py +28 -17
snowflake/ml/modeling/svm/linear_svr.py +28 -17
snowflake/ml/modeling/svm/nu_svc.py +28 -17
snowflake/ml/modeling/svm/nu_svr.py +28 -17
snowflake/ml/modeling/svm/svc.py +28 -17
snowflake/ml/modeling/svm/svr.py +28 -17
snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
snowflake/ml/registry/model_registry.py +49 -65
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
{snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0

snowflake/ml/_internal/file_utils.py CHANGED Viewed

@@ -1,15 +1,13 @@
 import contextlib
 import hashlib
-import importlib
 import io
 import os
 import pathlib
+import pkgutil
 import shutil
 import tempfile
 import zipfile
-from typing import IO, Generator, Optional, Tuple, Union
-from snowflake.snowpark import session as snowpark_session
+from typing import IO, Generator, List, Optional, Union
 GENERATED_PY_FILE_EXT = (".pyc", ".pyo", ".pyd", ".pyi")
@@ -116,19 +114,6 @@ def unzip_stream_in_temp_dir(stream: IO[bytes], temp_root: Optional[str] = None)
         yield tempdir
-@contextlib.contextmanager
-def zip_snowml() -> Generator[Tuple[io.BytesIO, str], None, None]:
-    """Zip the snowflake-ml source code as a zip-file for import.
-    Yields:
-        A bytes IO stream containing the zip file.
-    """
-    snowml_path = list(importlib.import_module("snowflake.ml").__path__)[0]
-    root_path = os.path.normpath(os.path.join(snowml_path, os.pardir, os.pardir))
-    with zip_file_or_directory_to_stream(snowml_path, root_path) as stream:
-        yield stream, hash_directory(snowml_path)
 def hash_directory(directory: Union[str, pathlib.Path]) -> str:
     """Hash the **content** of a folder recursively using SHA-1.
@@ -154,21 +139,9 @@ def hash_directory(directory: Union[str, pathlib.Path]) -> str:
     return _update_hash_from_dir(directory, hashlib.sha1()).hexdigest()
-def upload_snowml(session: snowpark_session.Session, stage_location: Optional[str] = None) -> str:
-    """Upload the SnowML local code into a stage if provided, or a session stage.
-    It will label the file name using the SHA-1 of the snowflake.ml folder, so that if the source code does not change,
-    it won't reupload. Any changes will, however, result a new zip file.
-    Args:
-        session: Snowpark connection session.
-        stage_location: The path to the stage location where the uploaded SnowML should be. Defaults to None.
-    Returns:
-        The path to the uploaded SnowML zip file.
-    """
-    with zip_snowml() as (stream, hash_str):
-        if stage_location is None:
-            stage_location = session.get_session_stage()
-        file_location = os.path.join(stage_location, f"snowml_{hash_str}.zip")
-        session.file.put_stream(stream, stage_location=file_location, auto_compress=False, overwrite=False)
-    return file_location
+def get_all_modules(dirname: str, prefix: str = "") -> List[pkgutil.ModuleInfo]:
+    subdirs = [f.path for f in os.scandir(dirname) if f.is_dir()]
+    modules = list(pkgutil.iter_modules(subdirs, prefix=prefix))
+    for dirname in subdirs:
+        modules.extend(get_all_modules(dirname, prefix=f"{prefix}.{dirname}" if prefix else dirname))
+    return modules

snowflake/ml/_internal/utils/identifier.py CHANGED Viewed

@@ -4,14 +4,19 @@ from typing import Any, List, Optional, Tuple, Union, overload
 from snowflake.snowpark._internal.analyzer import analyzer_utils
 # Snowflake Identifier Regex. See https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html.
-_SF_UNQUOTED_IDENTIFIER = "[A-Za-z_][A-Za-z0-9_$]*"
+_SF_UNQUOTED_CASE_INSENSITIVE_IDENTIFIER = "[A-Za-z_][A-Za-z0-9_$]*"
+_SF_UNQUOTED_CASE_SENSITIVE_IDENTIFIER = "[A-Z_][A-Z0-9_$]*"
 SF_QUOTED_IDENTIFIER = '"(?:[^"]|"")*"'
-_SF_IDENTIFIER = f"({_SF_UNQUOTED_IDENTIFIER}|{SF_QUOTED_IDENTIFIER})"
+_SF_IDENTIFIER = f"({_SF_UNQUOTED_CASE_INSENSITIVE_IDENTIFIER}|{SF_QUOTED_IDENTIFIER})"
 _SF_SCHEMA_LEVEL_OBJECT = rf"{_SF_IDENTIFIER}\.{_SF_IDENTIFIER}\.{_SF_IDENTIFIER}(.*)"
 _SF_SCHEMA_LEVEL_OBJECT_RE = re.compile(_SF_SCHEMA_LEVEL_OBJECT)
-UNQUOTED_CASE_INSENSITIVE_RE = re.compile(f"^({_SF_UNQUOTED_IDENTIFIER})$")
+UNQUOTED_CASE_INSENSITIVE_RE = re.compile(f"^({_SF_UNQUOTED_CASE_INSENSITIVE_IDENTIFIER})$")
+UNQUOTED_CASE_SENSITIVE_RE = re.compile(f"^({_SF_UNQUOTED_CASE_SENSITIVE_IDENTIFIER})$")
 QUOTED_IDENTIFIER_RE = re.compile(f"^({SF_QUOTED_IDENTIFIER})$")
+DOUBLE_QUOTE = '"'
+quote_name_without_upper_casing = analyzer_utils.quote_name_without_upper_casing
 def _is_quoted(id: str) -> bool:
@@ -61,10 +66,47 @@ def _get_unescaped_name(id: str) -> str:
     if not _is_quoted(id):
         return id.upper()
     unquoted_id = id[1:-1]
-    return unquoted_id.replace('""', '"')
+    return unquoted_id.replace(DOUBLE_QUOTE + DOUBLE_QUOTE, DOUBLE_QUOTE)
-quote_name_without_upper_casing = analyzer_utils.quote_name_without_upper_casing
+def _get_escaped_name(id: str) -> str:
+    """Add double quotes to escape quotes.
+        Replace double quotes with double double quotes if there is existing double quotes
+    NOTE: See note in :meth:`_is_quoted`.
+    Args:
+        id: The string to be checked & treated.
+    Returns:
+        String with quotes would doubled; original string would add double quotes.
+    """
+    escape_quotes = id.replace(DOUBLE_QUOTE, DOUBLE_QUOTE + DOUBLE_QUOTE)
+    return DOUBLE_QUOTE + escape_quotes + DOUBLE_QUOTE
+def get_inferred_name(id: str) -> str:
+    """Double quote id when it is case-sensitive and can start with and
+    contain any valid characters; unquote otherwise.
+    Examples:
+        COL1 -> COL1
+        1COL -> "1COL"
+        Col -> "Col"
+        "COL" -> \"""COL""\"  (ignore '\')
+        COL 1 -> "COL 1"
+    Args:
+        id: The string to be checked & treated.
+    Returns:
+        Double quoted identifier if necessary; unquoted string otherwise.
+    """
+    if UNQUOTED_CASE_SENSITIVE_RE.match(id):
+        return id
+    escaped_id = get_escaped_names(id)
+    assert isinstance(escaped_id, str)
+    return escaped_id
 def concat_names(ids: List[str]) -> str:
@@ -89,7 +131,7 @@ def concat_names(ids: List[str]) -> str:
         parts.append(id)
     final_id = "".join(parts)
     if quotes_needed:
-        return quote_name_without_upper_casing(final_id)
+        return _get_escaped_name(final_id)
     return final_id
@@ -135,7 +177,7 @@ def get_unescaped_names(ids: List[str]) -> List[str]:
 def get_unescaped_names(ids: Optional[Union[str, List[str]]]) -> Optional[Union[str, List[str]]]:
     """Given a user provided identifier(s), this method will compute the equivalent column name identifier(s) in the
-    response pandas dataframe(i.e., in the respones of snowpark_df.to_pandas()) using the rules defined here
+    response pandas dataframe(i.e., in the response of snowpark_df.to_pandas()) using the rules defined here
     https://docs.snowflake.com/en/sql-reference/identifiers-syntax.
     Args:
@@ -156,3 +198,28 @@ def get_unescaped_names(ids: Optional[Union[str, List[str]]]) -> Optional[Union[
         return _get_unescaped_name(ids)
     else:
         raise ValueError("Unsupported type. Only string or list of string are supported for selecting columns.")
+def get_escaped_names(ids: Optional[Union[str, List[str]]]) -> Optional[Union[str, List[str]]]:
+    """Given a user provided identifier(s), this method will compute the equivalent column name identifier(s)
+    in case of column name contains special characters, and maintains case-sensitivity
+    https://docs.snowflake.com/en/sql-reference/identifiers-syntax.
+    Args:
+        ids: User provided column name identifier(s).
+    Returns:
+        Double-quoted Identifiers for column names, to make sure that column names are case sensitive
+    Raises:
+        ValueError: if input types is unsupported or column name identifiers are invalid.
+    """
+    if ids is None:
+        return None
+    elif type(ids) is list:
+        return [_get_escaped_name(id) for id in ids]
+    elif type(ids) is str:
+        return _get_escaped_name(ids)
+    else:
+        raise ValueError("Unsupported type. Only string or list of string are supported for selecting columns.")

snowflake/ml/model/_core_requirements.py CHANGED Viewed

	@@ -1 +1 @@
1	- REQUIREMENTS=['anyio>=3.5.0,<4', 'cloudpickle', 'numpy>=1.23,<2', 'packaging>=20.9,<24', 'pandas>=1.0.0,<2', 'pyyaml>=6.0,<7', '~~scikit-learn>=1.2.1,<2', '~~snowflake-snowpark-python>=1.4.0,<2', 'typing-extensions>=4.1.0,<5']
1	+ REQUIREMENTS=['anyio>=3.5.0,<4', 'cloudpickle', 'numpy>=1.23,<2', 'packaging>=20.9,<24', 'pandas>=1.0.0,<2', 'pyyaml>=6.0,<7', 'snowflake-snowpark-python>=1.4.0,<2', 'typing-extensions>=4.1.0,<5']

snowflake/ml/model/_deploy_client/warehouse/deploy.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import IO, List, Optional, Tuple, TypedDict, Union
 from typing_extensions import Unpack
-from snowflake.ml._internal import env as snowml_env, env_utils, file_utils
+from snowflake.ml._internal import env_utils
 from snowflake.ml.model import (
     _env as model_env,
     _model,
@@ -62,11 +62,7 @@ def _deploy_to_warehouse(
     if target_method not in meta.signatures.keys():
         raise ValueError(f"Target method {target_method} does not exist in model.")
-    _use_local_snowml = kwargs.get("_use_local_snowml", False)
-    final_packages = _get_model_final_packages(
-        meta, session, relax_version=relax_version, _use_local_snowml=_use_local_snowml
-    )
+    final_packages = _get_model_final_packages(meta, session, relax_version=relax_version)
     stage_location = kwargs.get("permanent_udf_stage_location", None)
     if stage_location:
@@ -74,17 +70,11 @@ def _deploy_to_warehouse(
         if not stage_location.startswith("@"):
             raise ValueError(f"Invalid stage location {stage_location}.")
-    _snowml_wheel_path = None
-    if _use_local_snowml:
-        _snowml_wheel_path = file_utils.upload_snowml(session, stage_location=stage_location)
     with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
         _write_UDF_py_file(f.file, extract_model_code, target_method, **kwargs)
         print(f"Generated UDF file is persisted at: {f.name}")
-        imports = (
-            ([model_dir_path] if model_dir_path else [])
-            + ([model_stage_file_path] if model_stage_file_path else [])
-            + ([_snowml_wheel_path] if _snowml_wheel_path else [])
+        imports = ([model_dir_path] if model_dir_path else []) + (
+            [model_stage_file_path] if model_stage_file_path else []
         )
         class _UDFParams(TypedDict):
@@ -139,6 +129,7 @@ def _write_UDF_py_file(
         extract_model_code=extract_model_code,
         keep_order_code=infer_template._KEEP_ORDER_CODE_TEMPLATE if keep_order else "",
         target_method=target_method,
+        code_dir_name=_model_meta.ModelMetadata.MODEL_CODE_DIR,
     )
     f.write(udf_code)
     f.flush()
@@ -148,7 +139,6 @@ def _get_model_final_packages(
     meta: _model_meta.ModelMetadata,
     session: snowpark_session.Session,
     relax_version: Optional[bool] = False,
-    _use_local_snowml: Optional[bool] = False,
 ) -> List[str]:
     """Generate final packages list of dependency of a model to be deployed to warehouse.
@@ -157,7 +147,6 @@ def _get_model_final_packages(
         session: Snowpark connection session.
         relax_version: Whether or not relax the version restriction when fail to resolve dependencies.
             Defaults to False.
-        _use_local_snowml: Flag to indicate if using local SnowML code as execution library
     Raises:
         RuntimeError: Raised when PIP requirements and dependencies from non-Snowflake anaconda channel found.
@@ -174,16 +163,6 @@ def _get_model_final_packages(
         raise RuntimeError("PIP requirements and dependencies from non-Snowflake anaconda channel is not supported.")
     deps = meta._conda_dependencies[""]
-    if _use_local_snowml:
-        local_snowml_version = snowml_env.VERSION
-        snowml_dept = next((dep for dep in deps if dep.name == env_utils._SNOWML_PKG_NAME), None)
-        if snowml_dept:
-            if not snowml_dept.specifier.contains(local_snowml_version) and not relax_version:
-                raise RuntimeError(
-                    "Incompatible snowflake-ml-python-version is found. "
-                    + f"Require {snowml_dept.specifier}, got {local_snowml_version}."
-                )
-            deps.remove(snowml_dept)
     try:
         final_packages = env_utils.resolve_conda_environment(

snowflake/ml/model/_deploy_client/warehouse/infer_template.py CHANGED Viewed

@@ -48,10 +48,10 @@ class FileLock:
 IMPORT_DIRECTORY_NAME = "snowflake_import_directory"
 import_dir = sys._xoptions[IMPORT_DIRECTORY_NAME]
-from snowflake.ml.model import _model
 {extract_model_code}
+sys.path.insert(0, os.path.join(extracted_model_dir_path, "{code_dir_name}"))
+from snowflake.ml.model import _model
 model, meta = _model._load_model_for_deploy(extracted_model_dir_path)
 # TODO(halu): Wire `max_batch_size`.

snowflake/ml/model/_handlers/_base.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Generic, Optional
-from typing_extensions import TypeGuard
+from typing_extensions import TypeGuard, Unpack
 from snowflake.ml.model import _model_meta, type_hints as model_types
@@ -43,6 +43,7 @@ class _ModelHandler(ABC, Generic[model_types._ModelType]):
         model_blobs_dir_path: str,
         sample_input: Optional[model_types.SupportedDataType] = None,
         is_sub_model: Optional[bool] = False,
+        **kwargs: Unpack[model_types.ModelSaveOption],
     ) -> None:
         """Save the model.
@@ -53,6 +54,7 @@ class _ModelHandler(ABC, Generic[model_types._ModelType]):
             model_blobs_dir_path: Directory path to the model.
             sample_input: Sample input to infer the signatures from.
             is_sub_model: Flag to show if it is a sub model, a sub model does not need signature.
+            kwargs: Additional saving options.
         """
         ...

snowflake/ml/model/_handlers/sklearn.py CHANGED Viewed

@@ -101,6 +101,7 @@ class _SKLModelHandler(_base._ModelHandler[Union["sklearn.base.BaseEstimator", "
             name=name, model_type=_SKLModelHandler.handler_type, path=_SKLModelHandler.MODEL_BLOB_FILE
         )
         model_meta.models[name] = base_meta
+        model_meta._include_if_absent([("scikit-learn", "scikit-learn")])
     @staticmethod
     def _load_model(

snowflake/ml/model/_handlers/xgboost.py CHANGED Viewed

@@ -95,7 +95,7 @@ class _XGBModelHandler(_base._ModelHandler[Union["xgboost.Booster", "xgboost.XGB
             options={"xgb_estimator_type": model.__class__.__name__},
         )
         model_meta.models[name] = base_meta
-        model_meta._include_if_absent([("xgboost", "xgboost")])
+        model_meta._include_if_absent([("scikit-learn", "scikit-learn"), ("xgboost", "xgboost")])
     @staticmethod
     def _load_model(

snowflake/ml/model/_model.py CHANGED Viewed

@@ -2,9 +2,9 @@ import os
 import tempfile
 import warnings
 from types import ModuleType
-from typing import Dict, List, Literal, Optional, Tuple, Union, overload
+from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union, overload
-from snowflake.ml._internal import file_utils
+from snowflake.ml._internal import file_utils, type_utils
 from snowflake.ml.model import (
     _env,
     _model_handler,
@@ -13,9 +13,11 @@ from snowflake.ml.model import (
     model_signature,
     type_hints as model_types,
 )
-from snowflake.ml.modeling.framework import base
 from snowflake.snowpark import FileOperation, Session
+if TYPE_CHECKING:
+    from snowflake.ml.modeling.framework import base
 MODEL_BLOBS_DIR = "models"
@@ -23,7 +25,7 @@ MODEL_BLOBS_DIR = "models"
 def save_model(
     *,
     name: str,
-    model: base.BaseEstimator,
+    model: "base.BaseEstimator",
     model_dir_path: str,
     metadata: Optional[Dict[str, str]] = None,
     conda_dependencies: Optional[List[str]] = None,
@@ -135,7 +137,7 @@ def save_model(
 def save_model(
     *,
     name: str,
-    model: base.BaseEstimator,
+    model: "base.BaseEstimator",
     session: Session,
     model_stage_file_path: str,
     metadata: Optional[Dict[str, str]] = None,
@@ -322,9 +324,11 @@ def save_model(
             + f"{'None' if model_stage_file_path is None else 'specified'} at the same time."
         )
-    if ((signatures is None) and (sample_input is None) and not isinstance(model, base.BaseEstimator)) or (
-        (signatures is not None) and (sample_input is not None)
-    ):
+    if (
+        (signatures is None)
+        and (sample_input is None)
+        and not type_utils.LazyType("snowflake.ml.modeling.framework.base.BaseEstimator").isinstance(model)
+    ) or ((signatures is not None) and (sample_input is not None)):
         raise ValueError(
             "Signatures and sample_input both cannot be "
             + f"{'None for local model' if signatures is None else 'specified'} at the same time."
@@ -361,7 +365,7 @@ def save_model(
     assert session and model_stage_file_path
     if os.path.splitext(model_stage_file_path)[1] != ".zip":
-        raise ValueError("Provided model path in the stage {model_stage_file_path} must be a path to a zip file.")
+        raise ValueError(f"Provided model path in the stage {model_stage_file_path} must be a path to a zip file.")
     with tempfile.TemporaryDirectory() as temp_local_model_dir_path:
         meta = _save(
@@ -397,15 +401,15 @@ def _save(
     name: str,
     model: model_types.SupportedModelType,
     local_dir_path: str,
-    signatures: Optional[Dict[str, model_signature.ModelSignature]] = None,
-    sample_input: Optional[model_types.SupportedDataType] = None,
-    metadata: Optional[Dict[str, str]] = None,
-    conda_dependencies: Optional[List[str]] = None,
-    pip_requirements: Optional[List[str]] = None,
-    python_version: Optional[str] = None,
-    ext_modules: Optional[List[ModuleType]] = None,
-    code_paths: Optional[List[str]] = None,
-    options: Optional[model_types.ModelSaveOption] = None,
+    signatures: Optional[Dict[str, model_signature.ModelSignature]],
+    sample_input: Optional[model_types.SupportedDataType],
+    metadata: Optional[Dict[str, str]],
+    conda_dependencies: Optional[List[str]],
+    pip_requirements: Optional[List[str]],
+    python_version: Optional[str],
+    ext_modules: Optional[List[ModuleType]],
+    code_paths: Optional[List[str]],
+    options: model_types.ModelSaveOption,
 ) -> _model_meta.ModelMetadata:
     local_dir_path = os.path.normpath(local_dir_path)
@@ -423,6 +427,7 @@ def _save(
         conda_dependencies=conda_dependencies,
         pip_requirements=pip_requirements,
         python_version=python_version,
+        **options,
     ) as meta:
         model_blobs_path = os.path.join(local_dir_path, MODEL_BLOBS_DIR)
         os.makedirs(model_blobs_path, exist_ok=True)
@@ -539,7 +544,7 @@ def load_model(
     assert session and model_stage_file_path
     if os.path.splitext(model_stage_file_path)[1] != ".zip":
-        raise ValueError("Provided model path in the stage {model_stage_file_path} must be a path to a zip file.")
+        raise ValueError(f"Provided model path in the stage {model_stage_file_path} must be a path to a zip file.")
     fo = FileOperation(session=session)
     zf = fo.get_stream(model_stage_file_path)

snowflake/ml/model/_model_meta.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import dataclasses
+import importlib
 import os
 import sys
 import warnings
 from contextlib import contextmanager
 from datetime import datetime
-from pathlib import Path
 from types import ModuleType
 from typing import Any, Callable, Dict, Generator, List, Optional, Sequence, Tuple, cast
@@ -24,8 +24,6 @@ from snowflake.snowpark import DataFrame as SnowparkDataFrame
 MODEL_METADATA_VERSION = 1
 _BASIC_DEPENDENCIES = _core_requirements.REQUIREMENTS
-_BASIC_DEPENDENCIES.append(env_utils._SNOWML_PKG_NAME)
 @dataclasses.dataclass
 class _ModelBlobMetadata:
@@ -84,6 +82,10 @@ def _create_model_metadata(
         A model metadata object.
     """
     model_dir_path = os.path.normpath(model_dir_path)
+    embed_local_ml_library = kwargs.pop("embed_local_ml_library", False)
+    if embed_local_ml_library:
+        snowml_path = list(importlib.import_module("snowflake.ml").__path__)[0]
+        kwargs["local_ml_library_version"] = f"{snowml_env.VERSION}+{file_utils.hash_directory(snowml_path)}"
     model_meta = ModelMetadata(
         name=name,
@@ -100,6 +102,14 @@ def _create_model_metadata(
         os.makedirs(code_dir_path, exist_ok=True)
         for code_path in code_paths:
             file_utils.copy_file_or_tree(code_path, code_dir_path)
+    if embed_local_ml_library:
+        code_dir_path = os.path.join(model_dir_path, ModelMetadata.MODEL_CODE_DIR)
+        snowml_path = list(importlib.import_module("snowflake.ml").__path__)[0]
+        snowml_path_in_code = os.path.join(code_dir_path, "snowflake")
+        os.makedirs(snowml_path_in_code, exist_ok=True)
+        file_utils.copy_file_or_tree(snowml_path, snowml_path_in_code)
     try:
         imported_modules = []
         if ext_modules:
@@ -117,8 +127,7 @@ def _create_model_metadata(
 def _load_model_metadata(model_dir_path: str) -> "ModelMetadata":
     """Load models for a directory. Model is initially loaded normally. If additional codes are included when packed,
-        the code path is added to system path to be imported and overwrites those modules with the same name that has
-        been imported.
+        the code path is added to system path to be imported with highest priority.
     Args:
         model_dir_path: Path to the directory containing the model to be loaded.
@@ -131,14 +140,12 @@ def _load_model_metadata(model_dir_path: str) -> "ModelMetadata":
     meta = ModelMetadata.load_model_metadata(model_dir_path)
     code_path = os.path.join(model_dir_path, ModelMetadata.MODEL_CODE_DIR)
     if os.path.exists(code_path):
-        sys.path = [code_path] + sys.path
-        modules = [
-            p.stem
-            for p in Path(code_path).rglob("*.py")
-            if p.is_file() and p.name != "__init__.py" and p.name != "__main__.py"
-        ]
+        if code_path in sys.path:
+            sys.path.remove(code_path)
+        sys.path.insert(0, code_path)
+        modules = file_utils.get_all_modules(code_path)
         for module in modules:
-            sys.modules.pop(module, None)
+            sys.modules.pop(module.name, None)
     return meta
@@ -206,8 +213,10 @@ class ModelMetadata:
         self._pip_requirements = env_utils.validate_pip_requirement_string_list(
             pip_requirements if pip_requirements else []
         )
-        self._include_if_absent([(dep, dep) for dep in _BASIC_DEPENDENCIES])
+        if "local_ml_library_version" in kwargs:
+            self._include_if_absent([(dep, dep) for dep in _BASIC_DEPENDENCIES])
+        else:
+            self._include_if_absent([(dep, dep) for dep in _BASIC_DEPENDENCIES + [env_utils._SNOWML_PKG_NAME]])
         self.__dict__.update(kwargs)
@@ -344,7 +353,7 @@ class ModelMetadata:
         with open(model_yaml_path) as f:
             loaded_mata = yaml.safe_load(f.read())
-        loaded_mata_version = loaded_mata.get("version", None)
+        loaded_mata_version = loaded_mata.pop("version", None)
         if not loaded_mata_version or loaded_mata_version != MODEL_METADATA_VERSION:
             raise NotImplementedError("Unknown or unsupported model metadata file found.")

snowflake/ml/model/type_hints.py CHANGED Viewed

@@ -4,8 +4,6 @@ from typing import TYPE_CHECKING, Sequence, TypedDict, TypeVar, Union
 import numpy.typing as npt
 from typing_extensions import NotRequired, TypeAlias
-from snowflake.ml.modeling.framework import base
 if TYPE_CHECKING:
     import numpy as np
     import pandas as pd
@@ -15,6 +13,7 @@ if TYPE_CHECKING:
     import snowflake.ml.model.custom_model
     import snowflake.snowpark
+    from snowflake.ml.modeling.framework import base  # noqa: F401
 _SupportedBuiltins = Union[int, float, bool, str, bytes, "_SupportedBuiltinsList"]
@@ -54,7 +53,7 @@ SupportedLocalModelType = Union[
     "xgboost.Booster",
 ]
-SupportedSnowMLModelType: TypeAlias = base.BaseEstimator
+SupportedSnowMLModelType: TypeAlias = "base.BaseEstimator"
 SupportedModelType = Union[
     SupportedLocalModelType,
@@ -84,15 +83,8 @@ class DeployOptions(TypedDict):
         Defaults to False.
     keep_order: Whether or not preserve the row order when predicting. Only available for dataframe has fewer than 2**64
         rows. Defaults to True.
-    Internal-only options
-    _use_local_snowml: Use local SnowML when as the execution library of the deployment. If set to True, local SnowML
-        would be packed and uploaded to 1) session stage, if it is a temporary deployment, or 2) the provided stage path
-        if it is a permanent deployment. It should be set to True before SnowML available in Snowflake Anaconda Channel.
-        Default to False.
     """
-    _use_local_snowml: NotRequired[bool]
     output_with_input_features: NotRequired[bool]
     keep_order: NotRequired[bool]
@@ -115,14 +107,16 @@ class WarehouseDeployOptions(DeployOptions):
 class ModelSaveOption(TypedDict):
     """Options for saving the model.
+    embed_local_ml_library: Embedding local SnowML into the code directory of the folder.
     allow_overwritten_stage_file: Flag to indicate when saving the model as a stage file, whether overwriting existed
         file is allowed. Default to False.
     """
+    embed_local_ml_library: NotRequired[bool]
     allow_overwritten_stage_file: NotRequired[bool]
-class CustomModelSaveOption(TypedDict):
+class CustomModelSaveOption(ModelSaveOption):
     ...

snowflake/ml/modeling/calibration/calibrated_classifier_cv.py CHANGED Viewed

@@ -682,26 +682,37 @@ class CalibratedClassifierCV(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
+        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
         estimator = self._sklearn_object
-        input_df = dataset[input_cols] # Select input columns with quoted column names.
-        if hasattr(estimator, "feature_names_in_"):
-            missing_features = []
-            for i, f in enumerate(getattr(estimator, "feature_names_in_")):
-                if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
-                    missing_features.append(f)
-            if len(missing_features) > 0:
-                raise ValueError(
-                    "The feature names should match with those that were passed during fit.\n"
-                    f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
-                )
-            input_df.columns = getattr(estimator, "feature_names_in_")
-        else:
-            # Just rename the column names to unquoted identifiers.
-            input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
+        features_required_by_estimator =  getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
+        missing_features = []
+        features_in_dataset = set(dataset.columns)
+        columns_to_select = []
+        for i, f in enumerate(features_required_by_estimator):
+            if (
+                    i >= len(input_cols)
+                or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
+                or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
+                    and quoted_input_cols[i] not in features_in_dataset)
+                ):
+                missing_features.append(f)
+            elif input_cols[i] in features_in_dataset:
+                columns_to_select.append(input_cols[i])
+            elif unquoted_input_cols[i] in features_in_dataset:
+                columns_to_select.append(unquoted_input_cols[i])
+            else:
+                columns_to_select.append(quoted_input_cols[i])
+        if len(missing_features) > 0:
+            raise ValueError(
+                "The feature names should match with those that were passed during fit.\n"
+                f"Features seen during fit call but not present in the input: {missing_features}\n"
+                f"Features in the input dataframe : {input_cols}\n"
+            )
+        input_df = dataset[columns_to_select]
+        input_df.columns = features_required_by_estimator
         transformed_numpy_array = getattr(estimator, inference_method)(
             input_df

snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

snowflake-ml-python 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl