PyPI - snowflake-ml-python - Versions diffs - 1.6.1__py3-none-any.whl → 1.6.3__py3-none-any.whl - Mend

snowflake-ml-python 1.6.1py3-none-any.whl → 1.6.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (284) hide show

snowflake/cortex/__init__.py +4 -0
snowflake/cortex/_classify_text.py +2 -2
snowflake/cortex/_embed_text_1024.py +37 -0
snowflake/cortex/_embed_text_768.py +37 -0
snowflake/cortex/_extract_answer.py +2 -2
snowflake/cortex/_sentiment.py +2 -2
snowflake/cortex/_summarize.py +2 -2
snowflake/cortex/_translate.py +2 -2
snowflake/cortex/_util.py +4 -4
snowflake/ml/_internal/env_utils.py +5 -5
snowflake/ml/_internal/exceptions/error_codes.py +2 -0
snowflake/ml/_internal/telemetry.py +142 -20
snowflake/ml/_internal/utils/db_utils.py +50 -0
snowflake/ml/_internal/utils/identifier.py +48 -11
snowflake/ml/_internal/utils/service_logger.py +63 -0
snowflake/ml/_internal/utils/snowflake_env.py +23 -13
snowflake/ml/_internal/utils/sql_identifier.py +26 -2
snowflake/ml/_internal/utils/table_manager.py +19 -1
snowflake/ml/data/_internal/arrow_ingestor.py +1 -11
snowflake/ml/data/data_connector.py +33 -7
snowflake/ml/data/ingestor_utils.py +20 -10
snowflake/ml/data/torch_utils.py +68 -0
snowflake/ml/dataset/dataset.py +1 -3
snowflake/ml/feature_store/access_manager.py +3 -3
snowflake/ml/feature_store/feature_store.py +60 -19
snowflake/ml/feature_store/feature_view.py +84 -30
snowflake/ml/fileset/embedded_stage_fs.py +1 -1
snowflake/ml/fileset/fileset.py +1 -1
snowflake/ml/fileset/sfcfs.py +9 -3
snowflake/ml/fileset/stage_fs.py +2 -1
snowflake/ml/lineage/lineage_node.py +7 -2
snowflake/ml/model/__init__.py +1 -2
snowflake/ml/model/_client/model/model_version_impl.py +96 -12
snowflake/ml/model/_client/ops/model_ops.py +124 -6
snowflake/ml/model/_client/ops/service_ops.py +309 -9
snowflake/ml/model/_client/service/model_deployment_spec.py +8 -5
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +2 -2
snowflake/ml/model/_client/sql/_base.py +5 -0
snowflake/ml/model/_client/sql/model.py +1 -0
snowflake/ml/model/_client/sql/model_version.py +9 -5
snowflake/ml/model/_client/sql/service.py +121 -20
snowflake/ml/model/_model_composer/model_composer.py +11 -39
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -11
snowflake/ml/model/_packager/model_env/model_env.py +4 -38
snowflake/ml/model/_packager/model_handlers/_utils.py +134 -28
snowflake/ml/model/_packager/model_handlers/catboost.py +31 -30
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +26 -18
snowflake/ml/model/_packager/model_handlers/lightgbm.py +31 -58
snowflake/ml/model/_packager/model_handlers/mlflow.py +3 -5
snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +169 -0
snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +15 -8
snowflake/ml/model/_packager/model_handlers/sklearn.py +56 -60
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +141 -9
snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
snowflake/ml/model/_packager/model_handlers/xgboost.py +63 -48
snowflake/ml/model/_packager/model_meta/model_meta.py +16 -42
snowflake/ml/model/_packager/model_meta/model_meta_schema.py +1 -14
snowflake/ml/model/_packager/model_packager.py +14 -8
snowflake/ml/model/_packager/model_runtime/model_runtime.py +11 -0
snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
snowflake/ml/model/_signatures/snowpark_handler.py +3 -2
snowflake/ml/model/_signatures/utils.py +9 -0
snowflake/ml/model/type_hints.py +12 -145
snowflake/ml/modeling/_internal/constants.py +1 -0
snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
snowflake/ml/modeling/_internal/model_specifications.py +2 -0
snowflake/ml/modeling/_internal/model_trainer.py +1 -0
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -4
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +130 -166
snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +0 -1
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +61 -21
snowflake/ml/modeling/cluster/affinity_propagation.py +61 -21
snowflake/ml/modeling/cluster/agglomerative_clustering.py +61 -21
snowflake/ml/modeling/cluster/birch.py +61 -21
snowflake/ml/modeling/cluster/bisecting_k_means.py +61 -21
snowflake/ml/modeling/cluster/dbscan.py +61 -21
snowflake/ml/modeling/cluster/feature_agglomeration.py +61 -21
snowflake/ml/modeling/cluster/k_means.py +61 -21
snowflake/ml/modeling/cluster/mean_shift.py +61 -21
snowflake/ml/modeling/cluster/mini_batch_k_means.py +61 -21
snowflake/ml/modeling/cluster/optics.py +61 -21
snowflake/ml/modeling/cluster/spectral_biclustering.py +61 -21
snowflake/ml/modeling/cluster/spectral_clustering.py +61 -21
snowflake/ml/modeling/cluster/spectral_coclustering.py +61 -21
snowflake/ml/modeling/compose/column_transformer.py +61 -21
snowflake/ml/modeling/compose/transformed_target_regressor.py +61 -21
snowflake/ml/modeling/covariance/elliptic_envelope.py +61 -21
snowflake/ml/modeling/covariance/empirical_covariance.py +61 -21
snowflake/ml/modeling/covariance/graphical_lasso.py +61 -21
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +61 -21
snowflake/ml/modeling/covariance/ledoit_wolf.py +61 -21
snowflake/ml/modeling/covariance/min_cov_det.py +61 -21
snowflake/ml/modeling/covariance/oas.py +61 -21
snowflake/ml/modeling/covariance/shrunk_covariance.py +61 -21
snowflake/ml/modeling/decomposition/dictionary_learning.py +61 -21
snowflake/ml/modeling/decomposition/factor_analysis.py +61 -21
snowflake/ml/modeling/decomposition/fast_ica.py +61 -21
snowflake/ml/modeling/decomposition/incremental_pca.py +61 -21
snowflake/ml/modeling/decomposition/kernel_pca.py +61 -21
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +61 -21
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +61 -21
snowflake/ml/modeling/decomposition/pca.py +61 -21
snowflake/ml/modeling/decomposition/sparse_pca.py +61 -21
snowflake/ml/modeling/decomposition/truncated_svd.py +61 -21
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +61 -21
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +61 -21
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +61 -21
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +61 -21
snowflake/ml/modeling/ensemble/bagging_classifier.py +61 -21
snowflake/ml/modeling/ensemble/bagging_regressor.py +61 -21
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +61 -21
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +61 -21
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +61 -21
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +61 -21
snowflake/ml/modeling/ensemble/isolation_forest.py +61 -21
snowflake/ml/modeling/ensemble/random_forest_classifier.py +61 -21
snowflake/ml/modeling/ensemble/random_forest_regressor.py +61 -21
snowflake/ml/modeling/ensemble/stacking_regressor.py +61 -21
snowflake/ml/modeling/ensemble/voting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/voting_regressor.py +61 -21
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +61 -21
snowflake/ml/modeling/feature_selection/select_fdr.py +61 -21
snowflake/ml/modeling/feature_selection/select_fpr.py +61 -21
snowflake/ml/modeling/feature_selection/select_fwe.py +61 -21
snowflake/ml/modeling/feature_selection/select_k_best.py +61 -21
snowflake/ml/modeling/feature_selection/select_percentile.py +61 -21
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +61 -21
snowflake/ml/modeling/feature_selection/variance_threshold.py +61 -21
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +61 -21
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +61 -21
snowflake/ml/modeling/impute/iterative_imputer.py +61 -21
snowflake/ml/modeling/impute/knn_imputer.py +61 -21
snowflake/ml/modeling/impute/missing_indicator.py +61 -21
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +61 -21
snowflake/ml/modeling/kernel_approximation/nystroem.py +61 -21
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +61 -21
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +61 -21
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +61 -21
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +61 -21
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +61 -21
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ard_regression.py +61 -21
snowflake/ml/modeling/linear_model/bayesian_ridge.py +61 -21
snowflake/ml/modeling/linear_model/elastic_net.py +61 -21
snowflake/ml/modeling/linear_model/elastic_net_cv.py +61 -21
snowflake/ml/modeling/linear_model/gamma_regressor.py +61 -21
snowflake/ml/modeling/linear_model/huber_regressor.py +61 -21
snowflake/ml/modeling/linear_model/lars.py +61 -21
snowflake/ml/modeling/linear_model/lars_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso.py +61 -21
snowflake/ml/modeling/linear_model/lasso_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +61 -21
snowflake/ml/modeling/linear_model/linear_regression.py +61 -21
snowflake/ml/modeling/linear_model/logistic_regression.py +61 -21
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_lasso.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +61 -21
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +61 -21
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +61 -21
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +61 -21
snowflake/ml/modeling/linear_model/perceptron.py +61 -21
snowflake/ml/modeling/linear_model/poisson_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ransac_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ridge.py +61 -21
snowflake/ml/modeling/linear_model/ridge_classifier.py +61 -21
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +61 -21
snowflake/ml/modeling/linear_model/ridge_cv.py +61 -21
snowflake/ml/modeling/linear_model/sgd_classifier.py +61 -21
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +61 -21
snowflake/ml/modeling/linear_model/sgd_regressor.py +61 -21
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +61 -21
snowflake/ml/modeling/linear_model/tweedie_regressor.py +61 -21
snowflake/ml/modeling/manifold/isomap.py +61 -21
snowflake/ml/modeling/manifold/mds.py +61 -21
snowflake/ml/modeling/manifold/spectral_embedding.py +61 -21
snowflake/ml/modeling/manifold/tsne.py +61 -21
snowflake/ml/modeling/metrics/metrics_utils.py +2 -2
snowflake/ml/modeling/metrics/ranking.py +0 -3
snowflake/ml/modeling/metrics/regression.py +0 -3
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +61 -21
snowflake/ml/modeling/mixture/gaussian_mixture.py +61 -21
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +61 -21
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +61 -21
snowflake/ml/modeling/multiclass/output_code_classifier.py +61 -21
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/categorical_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/complement_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +61 -21
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +61 -21
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +61 -21
snowflake/ml/modeling/neighbors/kernel_density.py +61 -21
snowflake/ml/modeling/neighbors/local_outlier_factor.py +61 -21
snowflake/ml/modeling/neighbors/nearest_centroid.py +61 -21
snowflake/ml/modeling/neighbors/nearest_neighbors.py +61 -21
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +61 -21
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +61 -21
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +61 -21
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +61 -21
snowflake/ml/modeling/neural_network/mlp_classifier.py +61 -21
snowflake/ml/modeling/neural_network/mlp_regressor.py +61 -21
snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
snowflake/ml/modeling/pipeline/pipeline.py +1 -13
snowflake/ml/modeling/preprocessing/polynomial_features.py +61 -21
snowflake/ml/modeling/semi_supervised/label_propagation.py +61 -21
snowflake/ml/modeling/semi_supervised/label_spreading.py +61 -21
snowflake/ml/modeling/svm/linear_svc.py +61 -21
snowflake/ml/modeling/svm/linear_svr.py +61 -21
snowflake/ml/modeling/svm/nu_svc.py +61 -21
snowflake/ml/modeling/svm/nu_svr.py +61 -21
snowflake/ml/modeling/svm/svc.py +61 -21
snowflake/ml/modeling/svm/svr.py +61 -21
snowflake/ml/modeling/tree/decision_tree_classifier.py +61 -21
snowflake/ml/modeling/tree/decision_tree_regressor.py +61 -21
snowflake/ml/modeling/tree/extra_tree_classifier.py +61 -21
snowflake/ml/modeling/tree/extra_tree_regressor.py +61 -21
snowflake/ml/modeling/xgboost/xgb_classifier.py +64 -23
snowflake/ml/modeling/xgboost/xgb_regressor.py +64 -23
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +64 -23
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +64 -23
snowflake/ml/monitoring/_client/model_monitor.py +126 -0
snowflake/ml/monitoring/_client/model_monitor_manager.py +361 -0
snowflake/ml/monitoring/_client/model_monitor_version.py +1 -0
snowflake/ml/monitoring/_client/monitor_sql_client.py +1335 -0
snowflake/ml/monitoring/_client/queries/record_count.ssql +14 -0
snowflake/ml/monitoring/_client/queries/rmse.ssql +28 -0
snowflake/ml/monitoring/entities/model_monitor_config.py +28 -0
snowflake/ml/monitoring/entities/model_monitor_interval.py +46 -0
snowflake/ml/monitoring/entities/output_score_type.py +90 -0
snowflake/ml/registry/_manager/model_manager.py +4 -0
snowflake/ml/registry/registry.py +166 -8
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/METADATA +43 -9
snowflake_ml_python-1.6.3.dist-info/RECORD +400 -0
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/WHEEL +1 -1
snowflake/ml/_internal/container_services/image_registry/credential.py +0 -84
snowflake/ml/_internal/container_services/image_registry/http_client.py +0 -127
snowflake/ml/_internal/container_services/image_registry/imagelib.py +0 -400
snowflake/ml/_internal/container_services/image_registry/registry_client.py +0 -212
snowflake/ml/_internal/utils/log_stream_processor.py +0 -30
snowflake/ml/_internal/utils/session_token_manager.py +0 -46
snowflake/ml/_internal/utils/spcs_attribution_utils.py +0 -122
snowflake/ml/_internal/utils/uri.py +0 -77
snowflake/ml/data/torch_dataset.py +0 -33
snowflake/ml/model/_api.py +0 -568
snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +0 -12
snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +0 -249
snowflake/ml/model/_deploy_client/image_builds/docker_context.py +0 -130
snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +0 -36
snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +0 -268
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +0 -215
snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +0 -53
snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +0 -38
snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +0 -105
snowflake/ml/model/_deploy_client/snowservice/deploy.py +0 -611
snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +0 -116
snowflake/ml/model/_deploy_client/snowservice/instance_types.py +0 -10
snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +0 -28
snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template_with_model +0 -21
snowflake/ml/model/_deploy_client/utils/constants.py +0 -48
snowflake/ml/model/_deploy_client/utils/snowservice_client.py +0 -280
snowflake/ml/model/_deploy_client/warehouse/deploy.py +0 -202
snowflake/ml/model/_deploy_client/warehouse/infer_template.py +0 -99
snowflake/ml/model/_packager/model_handlers/llm.py +0 -267
snowflake/ml/model/_packager/model_meta/_core_requirements.py +0 -11
snowflake/ml/model/deploy_platforms.py +0 -6
snowflake/ml/model/models/llm.py +0 -104
snowflake/ml/monitoring/monitor.py +0 -203
snowflake/ml/registry/_initial_schema.py +0 -142
snowflake/ml/registry/_schema.py +0 -82
snowflake/ml/registry/_schema_upgrade_plans.py +0 -116
snowflake/ml/registry/_schema_version_manager.py +0 -163
snowflake/ml/registry/model_registry.py +0 -2048
snowflake_ml_python-1.6.1.dist-info/RECORD +0 -422
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/LICENSE.txt +0 -0
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/top_level.txt +0 -0

snowflake/ml/_internal/utils/service_logger.py ADDED Viewed

@@ -0,0 +1,63 @@
+import enum
+import logging
+import sys
+class LogColor(enum.Enum):
+    GREY = "\x1b[38;20m"
+    RED = "\x1b[31;20m"
+    BOLD_RED = "\x1b[31;1m"
+    YELLOW = "\x1b[33;20m"
+    BLUE = "\x1b[34;20m"
+    GREEN = "\x1b[32;20m"
+class CustomFormatter(logging.Formatter):
+    reset = "\x1b[0m"
+    log_format = "%(name)s [%(asctime)s] [%(levelname)s] %(message)s"
+    def __init__(self, info_color: LogColor) -> None:
+        super().__init__()
+        self.level_colors = {
+            logging.DEBUG: LogColor.GREY.value,
+            logging.INFO: info_color.value,
+            logging.WARNING: LogColor.YELLOW.value,
+            logging.ERROR: LogColor.RED.value,
+            logging.CRITICAL: LogColor.BOLD_RED.value,
+        }
+    def format(self, record: logging.LogRecord) -> str:
+        # default to DEBUG color
+        fmt = self.level_colors.get(record.levelno, self.level_colors[logging.DEBUG]) + self.log_format + self.reset
+        formatter = logging.Formatter(fmt)
+        # split the log message by lines and format each line individually
+        original_message = record.getMessage()
+        message_lines = original_message.splitlines()
+        formatted_lines = [
+            formatter.format(
+                logging.LogRecord(
+                    name=record.name,
+                    level=record.levelno,
+                    pathname=record.pathname,
+                    lineno=record.lineno,
+                    msg=line,
+                    args=None,
+                    exc_info=None,
+                )
+            )
+            for line in message_lines
+        ]
+        return "\n".join(formatted_lines)
+def get_logger(logger_name: str, info_color: LogColor) -> logging.Logger:
+    logger = logging.getLogger(logger_name)
+    logger.setLevel(logging.INFO)
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setLevel(logging.INFO)
+    handler.setFormatter(CustomFormatter(info_color))
+    logger.addHandler(handler)
+    return logger

snowflake/ml/_internal/utils/snowflake_env.py CHANGED Viewed

@@ -2,7 +2,7 @@ import enum
 from typing import Any, Dict, Optional, TypedDict, cast
 from packaging import version
-from typing_extensions import Required
+from typing_extensions import NotRequired, Required
 from snowflake.ml._internal.utils import query_result_checker
 from snowflake.snowpark import session
@@ -52,7 +52,7 @@ class SnowflakeCloudType(enum.Enum):
 class SnowflakeRegion(TypedDict):
-    region_group: Required[str]
+    region_group: NotRequired[str]
     snowflake_region: Required[str]
     cloud: Required[SnowflakeCloudType]
     region: Required[str]
@@ -64,23 +64,33 @@ def get_regions(
 ) -> Dict[str, SnowflakeRegion]:
     res = (
         query_result_checker.SqlResultValidator(sess, "SHOW REGIONS", statement_params=statement_params)
-        .has_column("region_group")
         .has_column("snowflake_region")
         .has_column("cloud")
         .has_column("region")
         .has_column("display_name")
         .validate()
     )
-    return {
-        f"{r.region_group}.{r.snowflake_region}": SnowflakeRegion(
-            region_group=r.region_group,
-            snowflake_region=r.snowflake_region,
-            cloud=SnowflakeCloudType.from_value(r.cloud),
-            region=r.region,
-            display_name=r.display_name,
-        )
-        for r in res
-    }
+    res_dict = {}
+    for r in res:
+        if hasattr(r, "region_group") and r.region_group:
+            key = f"{r.region_group}.{r.snowflake_region}"
+            res_dict[key] = SnowflakeRegion(
+                region_group=r.region_group,
+                snowflake_region=r.snowflake_region,
+                cloud=SnowflakeCloudType.from_value(r.cloud),
+                region=r.region,
+                display_name=r.display_name,
+            )
+        else:
+            key = r.snowflake_region
+            res_dict[key] = SnowflakeRegion(
+                snowflake_region=r.snowflake_region,
+                cloud=SnowflakeCloudType.from_value(r.cloud),
+                region=r.region,
+                display_name=r.display_name,
+            )
+    return res_dict
 def get_current_region_id(sess: session.Session, *, statement_params: Optional[Dict[str, Any]] = None) -> str:

snowflake/ml/_internal/utils/sql_identifier.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional, Tuple
+from typing import List, Optional, Tuple, Union
 from snowflake.ml._internal.utils import identifier
@@ -84,7 +84,7 @@ def to_sql_identifiers(list_of_str: List[str], *, case_sensitive: bool = False)
 def parse_fully_qualified_name(
     name: str,
 ) -> Tuple[Optional[SqlIdentifier], Optional[SqlIdentifier], SqlIdentifier]:
-    db, schema, object, _ = identifier.parse_schema_level_object_identifier(name)
+    db, schema, object = identifier.parse_schema_level_object_identifier(name)
     assert name is not None, f"Unable parse the input name `{name}` as fully qualified."
     return (
@@ -92,3 +92,27 @@ def parse_fully_qualified_name(
         SqlIdentifier(schema) if schema else None,
         SqlIdentifier(object),
     )
+def get_fully_qualified_name(
+    db: Union[SqlIdentifier, str, None],
+    schema: Union[SqlIdentifier, str, None],
+    object: Union[SqlIdentifier, str],
+    session_db: Optional[str] = None,
+    session_schema: Optional[str] = None,
+) -> str:
+    db_name: Optional[SqlIdentifier] = None
+    schema_name: Optional[SqlIdentifier] = None
+    if not db and session_db:
+        db_name = SqlIdentifier(session_db)
+    elif isinstance(db, str):
+        db_name = SqlIdentifier(db)
+    if not schema and session_schema:
+        schema_name = SqlIdentifier(session_schema)
+    elif isinstance(schema, str):
+        schema_name = SqlIdentifier(schema)
+    return identifier.get_schema_level_object_identifier(
+        db=db_name.identifier() if db_name else None,
+        schema=schema_name.identifier() if schema_name else None,
+        object_name=object.identifier() if isinstance(object, SqlIdentifier) else SqlIdentifier(object).identifier(),
+    )

snowflake/ml/_internal/utils/table_manager.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from typing import Any, Dict, List, Optional, Tuple
 from snowflake import snowpark
-from snowflake.ml._internal.utils import formatting, query_result_checker
+from snowflake.ml._internal.utils import formatting, identifier, query_result_checker
+from snowflake.snowpark import types
 """Table_manager is a set of utils that helps create tables.
@@ -104,3 +105,20 @@ def get_table_schema(session: snowpark.Session, table_name: str, qualified_schem
     for row in result:
         schema_dict[row["name"]] = row["type"]
     return schema_dict
+def get_table_schema_types(
+    session: snowpark.Session,
+    database: str,
+    schema: str,
+    table_name: str,
+) -> Dict[str, types.DataType]:
+    fully_qualified_table_name = identifier.get_schema_level_object_identifier(
+        db=database, schema=schema, object_name=table_name
+    )
+    struct_fields: List[types.StructField] = session.table(fully_qualified_table_name).schema.fields
+    schema_dict: Dict[str, types.DataType] = {}
+    for field in struct_fields:
+        schema_dict[field.name] = field.datatype
+    return schema_dict

snowflake/ml/data/_internal/arrow_ingestor.py CHANGED Viewed

@@ -11,7 +11,6 @@ import pyarrow as pa
 import pyarrow.dataset as pds
 from snowflake import snowpark
-from snowflake.connector import result_batch
 from snowflake.ml.data import data_ingestor, data_source, ingestor_utils
 _EMPTY_RECORD_BATCH = pa.RecordBatch.from_arrays([], [])
@@ -140,16 +139,7 @@ class ArrowIngestor(data_ingestor.DataIngestor):
                 #        We may be able to optimize this by splitting the result batches into
                 #        in-memory (first batch) and file URLs (subsequent batches) and creating a
                 #        union dataset.
-                result_batches = ingestor_utils.get_dataframe_result_batches(self._session, source)
-                sources.extend(
-                    b.to_arrow(self._session.connection)
-                    if isinstance(b, result_batch.ArrowResultBatch)
-                    else b.to_arrow()
-                    for b in result_batches
-                )
-                # HACK: Mitigate typing inconsistencies in Snowpark results
-                if len(sources) > 0:
-                    sources = [_cast_if_needed(s, sources[-1].schema) for s in sources]
+                sources.append(_cast_if_needed(ingestor_utils.get_dataframe_arrow_table(self._session, source)))
                 source_format = None  # Arrow Dataset expects "None" for in-memory datasets
             else:
                 raise RuntimeError(f"Unsupported data source type: {type(source)}")

snowflake/ml/data/data_connector.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import os
 from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Type, TypeVar
 import numpy.typing as npt
@@ -7,6 +8,10 @@ from snowflake import snowpark
 from snowflake.ml._internal import telemetry
 from snowflake.ml.data import data_ingestor, data_source
 from snowflake.ml.data._internal.arrow_ingestor import ArrowIngestor
+from snowflake.ml.modeling._internal.constants import (
+    IN_ML_RUNTIME_ENV_VAR,
+    USE_OPTIMIZED_DATA_INGESTOR,
+)
 if TYPE_CHECKING:
     import pandas as pd
@@ -142,32 +147,41 @@ class DataConnector:
         Returns:
             A Pytorch iterable datapipe that yield data.
         """
-        from torch.utils.data.datapipes import iter as torch_iter
+        from snowflake.ml.data import torch_utils
-        return torch_iter.IterableWrapper(  # type: ignore[no-untyped-call]
-            self._ingestor.to_batches(batch_size, shuffle, drop_last_batch)
+        return torch_utils.TorchDataPipeWrapper(
+            self._ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last_batch
         )
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,
         subproject_extractor=lambda self: type(self).__name__,
-        func_params_to_log=["shuffle"],
+        func_params_to_log=["batch_size", "shuffle", "drop_last_batch"],
     )
-    def to_torch_dataset(self, *, shuffle: bool = False) -> "torch_data.IterableDataset":  # type: ignore[type-arg]
+    def to_torch_dataset(
+        self, *, batch_size: int = 1, shuffle: bool = False, drop_last_batch: bool = True
+    ) -> "torch_data.IterableDataset":  # type: ignore[type-arg]
         """Transform the Snowflake data into a PyTorch Iterable Dataset to be used with a DataLoader.
         Return a PyTorch Dataset which iterates on rows of data.
         Args:
+            batch_size: It specifies the size of each data batch which will be yielded in the result dataset.
+                Batching is pushed down to data ingestion level which may be more performant than DataLoader
+                batching.
             shuffle: It specifies whether the data will be shuffled. If True, files will be shuffled, and
                 rows in each file will also be shuffled.
+            drop_last_batch: Whether the last batch of data should be dropped. If set to be true,
+                then the last batch will get dropped if its size is smaller than the given batch_size.
         Returns:
             A PyTorch Iterable Dataset that yields data.
         """
-        from snowflake.ml.data import torch_dataset
+        from snowflake.ml.data import torch_utils
-        return torch_dataset.TorchDataset(self._ingestor, shuffle)
+        return torch_utils.TorchDatasetWrapper(
+            self._ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last_batch
+        )
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,
@@ -184,3 +198,15 @@ class DataConnector:
             A Pandas DataFrame.
         """
         return self._ingestor.to_pandas(limit)
+# Switch to use Runtime's Data Ingester if running in ML runtime
+# Fail silently if the data ingester is not found
+if os.getenv(IN_ML_RUNTIME_ENV_VAR) and os.getenv(USE_OPTIMIZED_DATA_INGESTOR):
+    try:
+        from runtime_external_entities import get_ingester_class
+        DataConnector.DEFAULT_INGESTOR_CLASS = get_ingester_class()
+    except ImportError:
+        """Runtime Default Ingester not found, ignore"""
+        pass

snowflake/ml/data/ingestor_utils.py CHANGED Viewed

@@ -1,19 +1,17 @@
 from typing import List, Optional
 import fsspec
+import pyarrow as pa
 from snowflake import snowpark
-from snowflake.connector import result_batch
+from snowflake.connector import cursor as sf_cursor, result_batch
 from snowflake.ml.data import data_source
 from snowflake.ml.fileset import snowfs
 _TARGET_FILE_SIZE = 32 * 2**20  # The max file size for data loading.
-def get_dataframe_result_batches(
-    session: snowpark.Session, df_info: data_source.DataFrameInfo
-) -> List[result_batch.ResultBatch]:
-    """Retrieve the ResultBatches for a given query"""
+def _get_dataframe_cursor(session: snowpark.Session, df_info: data_source.DataFrameInfo) -> sf_cursor.SnowflakeCursor:
     cursor = session._conn._cursor
     if df_info.query_id:
@@ -29,12 +27,24 @@ def get_dataframe_result_batches(
     if cursor._prefetch_hook is None:
         raise RuntimeError("Loading data from result query failed unexpectedly. Please contact Snowflake support.")
     cursor._prefetch_hook()
+    return cursor
+def get_dataframe_result_batches(
+    session: snowpark.Session, df_info: data_source.DataFrameInfo
+) -> List[result_batch.ResultBatch]:
+    """Retrieve the ResultBatches for a given query"""
+    cursor = _get_dataframe_cursor(session, df_info)
     batches = cursor.get_result_batches()
-    if batches is None:
-        raise ValueError(
-            "Failed to retrieve training data. Query status:" f" {session._conn._conn.get_query_status(query_id)}"
-        )
-    return batches
+    return batches or []
+def get_dataframe_arrow_table(session: snowpark.Session, df_info: data_source.DataFrameInfo) -> pa.Table:
+    """Retrieve the full in-memory result for a given query"""
+    cursor = _get_dataframe_cursor(session, df_info)
+    table = cursor.fetch_arrow_all()  # type: ignore[call-overload]
+    return table
 def get_dataset_filesystem(

snowflake/ml/data/torch_utils.py ADDED Viewed

@@ -0,0 +1,68 @@
+from typing import Any, Dict, Iterator, List, Union
+import numpy as np
+import numpy.typing as npt
+import torch.utils.data
+from snowflake.ml.data import data_ingestor
+class TorchDatasetWrapper(torch.utils.data.IterableDataset[Dict[str, Any]]):
+    """Wrap a DataIngestor into a PyTorch IterableDataset"""
+    def __init__(
+        self,
+        ingestor: data_ingestor.DataIngestor,
+        *,
+        batch_size: int,
+        shuffle: bool = False,
+        drop_last: bool = False,
+        squeeze_outputs: bool = True
+    ) -> None:
+        """Not intended for direct usage. Use DataConnector.to_torch_dataset() instead"""
+        self._ingestor = ingestor
+        self._batch_size = batch_size
+        self._shuffle = shuffle
+        self._drop_last = drop_last
+        self._squeeze_outputs = squeeze_outputs
+    def __iter__(self) -> Iterator[Dict[str, Union[npt.NDArray[Any], List[Any]]]]:
+        max_idx = 0
+        filter_idx = 0
+        worker_info = torch.utils.data.get_worker_info()
+        if worker_info is not None:
+            max_idx = worker_info.num_workers - 1
+            filter_idx = worker_info.id
+        if self._shuffle and worker_info is not None:
+            raise RuntimeError("Dataset shuffling not currently supported with multithreading")
+        counter = 0
+        for batch in self._ingestor.to_batches(
+            batch_size=self._batch_size, shuffle=self._shuffle, drop_last_batch=self._drop_last
+        ):
+            # Skip indices during multi-process data loading to prevent data duplication
+            if counter == filter_idx:
+                # Basic preprocessing on batch values: squeeze away extra dimensions
+                # and convert object arrays (e.g. strings) to lists
+                if self._squeeze_outputs:
+                    yield {
+                        k: (v.squeeze().tolist() if v.dtype == np.object_ else v.squeeze()) for k, v in batch.items()
+                    }
+                else:
+                    yield batch  # type: ignore[misc]
+            if counter < max_idx:
+                counter += 1
+            else:
+                counter = 0
+class TorchDataPipeWrapper(TorchDatasetWrapper, torch.utils.data.IterDataPipe[Dict[str, Any]]):
+    """Wrap a DataIngestor into a PyTorch IterDataPipe"""
+    def __init__(
+        self, ingestor: data_ingestor.DataIngestor, *, batch_size: int, shuffle: bool = False, drop_last: bool = False
+    ) -> None:
+        """Not intended for direct usage. Use DataConnector.to_torch_datapipe() instead"""
+        super().__init__(ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, squeeze_outputs=False)

snowflake/ml/dataset/dataset.py CHANGED Viewed

@@ -472,9 +472,7 @@ lineage_node.DOMAIN_LINEAGE_REGISTRY["dataset"] = Dataset
 def _get_schema_level_identifier(session: snowpark.Session, dataset_name: str) -> Tuple[str, str, str]:
     """Resolve a dataset name into a validated schema-level location identifier"""
-    db, schema, object_name, others = identifier.parse_schema_level_object_identifier(dataset_name)
-    if others:
-        raise ValueError(f"Invalid identifier: unexpected '{others}'")
+    db, schema, object_name = identifier.parse_schema_level_object_identifier(dataset_name)
     db = db or session.get_current_database()
     schema = schema or session.get_current_schema()
     return str(db), str(schema), str(object_name)

snowflake/ml/feature_store/access_manager.py CHANGED Viewed

@@ -30,6 +30,7 @@ class _Privilege:
     object_name: str
     privileges: List[str]
     scope: Optional[str] = None
+    optional: bool = False
 @dataclass(frozen=True)
@@ -72,8 +73,7 @@ _PRE_INIT_PRIVILEGES: Dict[_FeatureStoreRole, List[_Privilege]] = {
         _Privilege("VIEW", _ALL_OBJECTS, ["SELECT", "REFERENCES"], "SCHEMA {database}.{schema}"),
         _Privilege("TABLE", _ALL_OBJECTS, ["SELECT", "REFERENCES"], "SCHEMA {database}.{schema}"),
         _Privilege("DATASET", _ALL_OBJECTS, ["USAGE"], "SCHEMA {database}.{schema}"),
-        # User should decide whether they want to grant warehouse usage to CONSUMER
-        # _Privilege("WAREHOUSE", "{warehouse}", ["USAGE"]),
+        _Privilege("WAREHOUSE", "{warehouse}", ["USAGE"], optional=True),
     ],
     _FeatureStoreRole.NONE: [],
 }
@@ -109,7 +109,7 @@ def _grant_privileges(
                 query += f" TO ROLE {role_name}"
                 session.sql(query).collect()
         except exceptions.SnowparkSQLException as e:
-            if any(
+            if p.optional or any(
                 s in e.message
                 for s in (
                     "Ask your account admin",

snowflake/ml/feature_store/feature_store.py CHANGED Viewed

@@ -122,6 +122,14 @@ _DT_OR_VIEW_QUERY_PATTERN = re.compile(
     flags=re.DOTALL | re.IGNORECASE | re.X,
 )
+_DT_INITIALIZE_PATTERN = re.compile(
+    r"""CREATE\ DYNAMIC\ TABLE\ .*
+        initialize\ =\ '(?P<initialize>.*)'\ .*?
+        AS\ .*
+    """,
+    flags=re.DOTALL | re.IGNORECASE | re.X,
+)
 _LIST_FEATURE_VIEW_SCHEMA = StructType(
     [
         StructField("name", StringType()),
@@ -565,11 +573,15 @@ class FeatureStore:
         tagging_clause_str = ",\n".join(tagging_clause)
         def create_col_desc(col: StructField) -> str:
-            desc = feature_view.feature_descs.get(SqlIdentifier(col.name), None)
+            desc = feature_view.feature_descs.get(SqlIdentifier(col.name), None)  # type: ignore[union-attr]
             desc = "" if desc is None else f"COMMENT '{desc}'"
             return f"{col.name} {desc}"
-        column_descs = ", ".join([f"{create_col_desc(col)}" for col in feature_view.output_schema.fields])
+        column_descs = (
+            ", ".join([f"{create_col_desc(col)}" for col in feature_view.output_schema.fields])
+            if feature_view.feature_descs is not None
+            else ""
+        )
         if refresh_freq is not None:
             schedule_task = refresh_freq != "DOWNSTREAM" and timeparse(refresh_freq) is None
@@ -604,7 +616,7 @@ class FeatureStore:
         logger.info(f"Registered FeatureView {feature_view.name}/{version} successfully.")
         return self.get_feature_view(feature_view.name, str(version))
-    @dispatch_decorator()
+    @overload
     def update_feature_view(
         self,
         name: str,
@@ -613,13 +625,37 @@ class FeatureStore:
         refresh_freq: Optional[str] = None,
         warehouse: Optional[str] = None,
         desc: Optional[str] = None,
+    ) -> FeatureView:
+        ...
+    @overload
+    def update_feature_view(
+        self,
+        name: FeatureView,
+        version: Optional[str] = None,
+        *,
+        refresh_freq: Optional[str] = None,
+        warehouse: Optional[str] = None,
+        desc: Optional[str] = None,
+    ) -> FeatureView:
+        ...
+    @dispatch_decorator()  # type: ignore[misc]
+    def update_feature_view(
+        self,
+        name: Union[FeatureView, str],
+        version: Optional[str] = None,
+        *,
+        refresh_freq: Optional[str] = None,
+        warehouse: Optional[str] = None,
+        desc: Optional[str] = None,
     ) -> FeatureView:
         """Update a registered feature view.
             Check feature_view.py for which fields are allowed to be updated after registration.
         Args:
-            name: name of the FeatureView to be updated.
-            version: version of the FeatureView to be updated.
+            name: FeatureView object or name to suspend.
+            version: Optional version of feature view. Must set when argument feature_view is a str.
             refresh_freq: updated refresh frequency.
             warehouse: updated warehouse.
             desc: description of feature view.
@@ -661,7 +697,7 @@ class FeatureStore:
             SnowflakeMLException: [RuntimeError] If FeatureView is not managed and refresh_freq is defined.
             SnowflakeMLException: [RuntimeError] Failed to update feature view.
         """
-        feature_view = self.get_feature_view(name=name, version=version)
+        feature_view = self._validate_feature_view_name_and_version_input(name, version)
         new_desc = desc if desc is not None else feature_view.desc
         if feature_view.status == FeatureViewStatus.STATIC:
@@ -696,7 +732,7 @@ class FeatureStore:
                     f"Update feature view {feature_view.name}/{feature_view.version} failed: {e}"
                 ),
             ) from e
-        return self.get_feature_view(name=name, version=version)
+        return self.get_feature_view(name=feature_view.name, version=str(feature_view.version))
     @overload
     def read_feature_view(self, feature_view: str, version: str) -> DataFrame:
@@ -1795,6 +1831,7 @@ class FeatureStore:
                 )
                 WAREHOUSE = {warehouse}
                 REFRESH_MODE = {feature_view.refresh_mode}
+                INITIALIZE = {feature_view.initialize}
                 AS {feature_view.query}
             """
             self._session.sql(query).collect(block=block, statement_params=self._telemetry_stmp)
@@ -2121,7 +2158,7 @@ class FeatureStore:
         if "." not in name:
             return f"{self._config.full_schema_path}.{name}"
-        db_name, schema_name, object_name, _ = identifier.parse_schema_level_object_identifier(name)
+        db_name, schema_name, object_name = identifier.parse_schema_level_object_identifier(name)
         return "{}.{}.{}".format(
             db_name or self._config.database,
             schema_name or self._config.schema,
@@ -2186,11 +2223,7 @@ class FeatureStore:
         if len(fv_maps.keys()) == 0:
             return self._session.create_dataframe([], schema=_LIST_FEATURE_VIEW_SCHEMA)
-        filters = (
-            [lambda d: d["entityName"].startswith(feature_view_name.resolved())]  # type: ignore[union-attr]
-            if feature_view_name
-            else None
-        )
+        filters = [lambda d: d["entityName"].startswith(feature_view_name.resolved())] if feature_view_name else None
         res = self._lookup_tagged_objects(self._get_entity_name(entity_name), filters)
         output_values: List[List[Any]] = []
@@ -2273,6 +2306,8 @@ class FeatureStore:
             entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
             ts_col = fv_metadata.timestamp_col
             timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
+            re_initialize = re.match(_DT_INITIALIZE_PATTERN, row["text"])
+            initialize = re_initialize.group("initialize") if re_initialize is not None else "ON_CREATE"
             fv = FeatureView._construct_feature_view(
                 name=name,
@@ -2281,18 +2316,23 @@ class FeatureStore:
                 timestamp_col=timestamp_col,
                 desc=desc,
                 version=version,
-                status=FeatureViewStatus(row["scheduling_state"])
-                if len(row["scheduling_state"]) > 0
-                else FeatureViewStatus.MASKED,
+                status=(
+                    FeatureViewStatus(row["scheduling_state"])
+                    if len(row["scheduling_state"]) > 0
+                    else FeatureViewStatus.MASKED
+                ),
                 feature_descs=self._fetch_column_descs("DYNAMIC TABLE", fv_name),
                 refresh_freq=row["target_lag"],
                 database=self._config.database.identifier(),
                 schema=self._config.schema.identifier(),
-                warehouse=SqlIdentifier(row["warehouse"], case_sensitive=True).identifier()
-                if len(row["warehouse"]) > 0
-                else None,
+                warehouse=(
+                    SqlIdentifier(row["warehouse"], case_sensitive=True).identifier()
+                    if len(row["warehouse"]) > 0
+                    else None
+                ),
                 refresh_mode=row["refresh_mode"],
                 refresh_mode_reason=row["refresh_mode_reason"],
+                initialize=initialize,
                 owner=row["owner"],
                 infer_schema_df=infer_schema_df,
                 session=self._session,
@@ -2319,6 +2359,7 @@ class FeatureStore:
                 warehouse=None,
                 refresh_mode=None,
                 refresh_mode_reason=None,
+                initialize="ON_CREATE",
                 owner=row["owner"],
                 infer_schema_df=infer_schema_df,
                 session=self._session,

snowflake-ml-python 1.6.1__py3-none-any.whl → 1.6.3__py3-none-any.whl

snowflake-ml-python 1.6.1py3-none-any.whl → 1.6.3py3-none-any.whl