snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. snowflake/ml/_internal/env_utils.py +6 -0
  2. snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
  3. snowflake/ml/_internal/telemetry.py +1 -0
  4. snowflake/ml/_internal/utils/identifier.py +1 -1
  5. snowflake/ml/_internal/utils/sql_identifier.py +14 -1
  6. snowflake/ml/dataset/__init__.py +2 -1
  7. snowflake/ml/dataset/dataset.py +4 -3
  8. snowflake/ml/dataset/dataset_reader.py +5 -8
  9. snowflake/ml/feature_store/__init__.py +6 -0
  10. snowflake/ml/feature_store/access_manager.py +279 -0
  11. snowflake/ml/feature_store/feature_store.py +159 -99
  12. snowflake/ml/feature_store/feature_view.py +18 -8
  13. snowflake/ml/fileset/embedded_stage_fs.py +15 -12
  14. snowflake/ml/fileset/snowfs.py +3 -2
  15. snowflake/ml/fileset/stage_fs.py +25 -7
  16. snowflake/ml/model/_client/model/model_impl.py +46 -39
  17. snowflake/ml/model/_client/model/model_version_impl.py +24 -2
  18. snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
  19. snowflake/ml/model/_client/ops/model_ops.py +131 -16
  20. snowflake/ml/model/_client/sql/_base.py +34 -0
  21. snowflake/ml/model/_client/sql/model.py +32 -39
  22. snowflake/ml/model/_client/sql/model_version.py +60 -43
  23. snowflake/ml/model/_client/sql/stage.py +6 -32
  24. snowflake/ml/model/_client/sql/tag.py +32 -56
  25. snowflake/ml/model/_model_composer/model_composer.py +2 -2
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
  27. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +50 -21
  28. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +81 -3
  29. snowflake/ml/modeling/framework/base.py +4 -3
  30. snowflake/ml/modeling/pipeline/pipeline.py +27 -7
  31. snowflake/ml/registry/_manager/model_manager.py +36 -7
  32. snowflake/ml/version.py +1 -1
  33. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.1.dist-info}/METADATA +54 -10
  34. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.1.dist-info}/RECORD +37 -35
  35. snowflake/ml/_internal/lineage/dataset_dataframe.py +0 -44
  36. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.1.dist-info}/LICENSE.txt +0 -0
  37. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.1.dist-info}/WHEEL +0 -0
  38. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.1.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,19 @@ import re
8
8
  import warnings
9
9
  from dataclasses import dataclass
10
10
  from enum import Enum
11
- from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union, cast
11
+ from typing import (
12
+ Any,
13
+ Callable,
14
+ Dict,
15
+ List,
16
+ Literal,
17
+ Optional,
18
+ Tuple,
19
+ TypeVar,
20
+ Union,
21
+ cast,
22
+ overload,
23
+ )
12
24
 
13
25
  import packaging.version as pkg_version
14
26
  import snowflake.ml.version as snowml_version
@@ -32,7 +44,7 @@ from snowflake.ml.feature_store.entity import _ENTITY_NAME_LENGTH_LIMIT, Entity
32
44
  from snowflake.ml.feature_store.feature_view import (
33
45
  _FEATURE_OBJ_TYPE,
34
46
  _FEATURE_VIEW_NAME_DELIMITER,
35
- _TIMESTAMP_COL_PLACEHOLDER,
47
+ _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS,
36
48
  FeatureView,
37
49
  FeatureViewSlice,
38
50
  FeatureViewStatus,
@@ -242,23 +254,16 @@ class FeatureStore:
242
254
 
243
255
  else:
244
256
  try:
245
- self._session.sql(f"CREATE SCHEMA IF NOT EXISTS {self._config.full_schema_path}").collect(
246
- statement_params=self._telemetry_stmp
247
- )
248
- for tag in to_sql_identifiers(
249
- [
250
- _FEATURE_VIEW_METADATA_TAG,
251
- ]
252
- ):
257
+ # Explicitly check if schema exists first since we may not have CREATE SCHEMA privilege
258
+ if len(self._find_object("SCHEMAS", self._config.schema)) == 0:
259
+ self._session.sql(f"CREATE SCHEMA IF NOT EXISTS {self._config.full_schema_path}").collect(
260
+ statement_params=self._telemetry_stmp
261
+ )
262
+ for tag in to_sql_identifiers([_FEATURE_VIEW_METADATA_TAG, _FEATURE_STORE_OBJECT_TAG]):
253
263
  self._session.sql(f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(tag)}").collect(
254
264
  statement_params=self._telemetry_stmp
255
265
  )
256
-
257
- self._session.sql(
258
- f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}"
259
- ).collect(statement_params=self._telemetry_stmp)
260
266
  except Exception as e:
261
- self.clear()
262
267
  raise snowml_exceptions.SnowflakeMLException(
263
268
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
264
269
  original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
@@ -750,7 +755,7 @@ class FeatureStore:
750
755
  except Exception as e:
751
756
  raise snowml_exceptions.SnowflakeMLException(
752
757
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
753
- original_exception=RuntimeError(f"Failed to alter schema or drop tag: {e}."),
758
+ original_exception=RuntimeError(f"Failed to delete entity: {e}."),
754
759
  ) from e
755
760
  logger.info(f"Deleted Entity {name}.")
756
761
 
@@ -802,7 +807,7 @@ class FeatureStore:
802
807
 
803
808
  return df
804
809
 
805
- @dispatch_decorator()
810
+ @overload
806
811
  def generate_dataset(
807
812
  self,
808
813
  name: str,
@@ -814,7 +819,40 @@ class FeatureStore:
814
819
  exclude_columns: Optional[List[str]] = None,
815
820
  include_feature_view_timestamp_col: bool = False,
816
821
  desc: str = "",
822
+ output_type: Literal["dataset"] = "dataset",
817
823
  ) -> dataset.Dataset:
824
+ ...
825
+
826
+ @overload
827
+ def generate_dataset(
828
+ self,
829
+ name: str,
830
+ spine_df: DataFrame,
831
+ features: List[Union[FeatureView, FeatureViewSlice]],
832
+ output_type: Literal["table"],
833
+ version: Optional[str] = None,
834
+ spine_timestamp_col: Optional[str] = None,
835
+ spine_label_cols: Optional[List[str]] = None,
836
+ exclude_columns: Optional[List[str]] = None,
837
+ include_feature_view_timestamp_col: bool = False,
838
+ desc: str = "",
839
+ ) -> DataFrame:
840
+ ...
841
+
842
+ @dispatch_decorator() # type: ignore[misc]
843
+ def generate_dataset(
844
+ self,
845
+ name: str,
846
+ spine_df: DataFrame,
847
+ features: List[Union[FeatureView, FeatureViewSlice]],
848
+ version: Optional[str] = None,
849
+ spine_timestamp_col: Optional[str] = None,
850
+ spine_label_cols: Optional[List[str]] = None,
851
+ exclude_columns: Optional[List[str]] = None,
852
+ include_feature_view_timestamp_col: bool = False,
853
+ desc: str = "",
854
+ output_type: Literal["dataset", "table"] = "dataset",
855
+ ) -> Union[dataset.Dataset, DataFrame]:
818
856
  """
819
857
  Generate dataset by given source table and feature views.
820
858
 
@@ -834,30 +872,29 @@ class FeatureStore:
834
872
  include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
835
873
  (if feature view has timestamp column) if set true. Default to false.
836
874
  desc: A description about this dataset.
875
+ output_type: The type of Snowflake storage to use for the generated training data.
837
876
 
838
877
  Returns:
839
- A Dataset object.
878
+ If output_type is "dataset" (default), returns a Dataset object.
879
+ If output_type is "table", returns a Snowpark DataFrame representing the table.
840
880
 
841
881
  Raises:
842
- SnowflakeMLException: [ValueError] spine_df contains more than one query.
843
882
  SnowflakeMLException: [ValueError] Dataset name/version already exists
844
883
  SnowflakeMLException: [ValueError] Snapshot creation failed.
884
+ SnowflakeMLException: [ValueError] Invalid output_type specified.
845
885
  SnowflakeMLException: [RuntimeError] Failed to create clone from table.
846
886
  SnowflakeMLException: [RuntimeError] Failed to find resources.
847
887
  """
888
+ if output_type not in {"table", "dataset"}:
889
+ raise snowml_exceptions.SnowflakeMLException(
890
+ error_code=error_codes.INVALID_ARGUMENT,
891
+ original_exception=ValueError(f"Invalid output_type: {output_type}."),
892
+ )
848
893
  if spine_timestamp_col is not None:
849
894
  spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
850
895
  if spine_label_cols is not None:
851
896
  spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
852
897
 
853
- if len(spine_df.queries["queries"]) != 1:
854
- raise snowml_exceptions.SnowflakeMLException(
855
- error_code=error_codes.INVALID_ARGUMENT,
856
- original_exception=ValueError(
857
- f"spine_df must contain only one query. Got: {spine_df.queries['queries']}"
858
- ),
859
- )
860
-
861
898
  result_df, join_keys = self._join_features(
862
899
  spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
863
900
  )
@@ -875,33 +912,49 @@ class FeatureStore:
875
912
  result_df = self._exclude_columns(result_df, exclude_columns)
876
913
 
877
914
  fs_meta = FeatureStoreMetadata(
878
- spine_query=spine_df.queries["queries"][0],
915
+ spine_query=spine_df.queries["queries"][-1],
879
916
  serialized_feature_views=[fv.to_json() for fv in features],
880
917
  spine_timestamp_col=spine_timestamp_col,
881
918
  )
882
919
 
883
920
  try:
884
- ds: dataset.Dataset = dataset.create_from_dataframe(
885
- self._session,
886
- name,
887
- version,
888
- input_dataframe=result_df,
889
- exclude_cols=[spine_timestamp_col],
890
- label_cols=spine_label_cols,
891
- properties=fs_meta,
892
- comment=desc,
893
- )
894
- return ds
921
+ if output_type == "table":
922
+ table_name = f"{name}_{version}"
923
+ result_df.write.mode("errorifexists").save_as_table(table_name) # type: ignore[call-overload]
924
+ ds_df = self._session.table(table_name)
925
+ return ds_df
926
+ else:
927
+ assert output_type == "dataset"
928
+ if not self._is_dataset_enabled():
929
+ raise snowml_exceptions.SnowflakeMLException(
930
+ error_code=error_codes.SNOWML_CREATE_FAILED,
931
+ original_exception=RuntimeError(
932
+ "Dataset is not enabled in your account. Ask your account admin to set"
933
+ ' FEATURE_DATASET=ENABLED or set output_type="table" to generate the data'
934
+ " as a Snowflake Table instead."
935
+ ),
936
+ )
937
+ ds: dataset.Dataset = dataset.create_from_dataframe(
938
+ self._session,
939
+ name,
940
+ version,
941
+ input_dataframe=result_df,
942
+ exclude_cols=[spine_timestamp_col],
943
+ label_cols=spine_label_cols,
944
+ properties=fs_meta,
945
+ comment=desc,
946
+ )
947
+ return ds
895
948
 
896
949
  except dataset_errors.DatasetExistError as e:
897
950
  raise snowml_exceptions.SnowflakeMLException(
898
951
  error_code=error_codes.OBJECT_ALREADY_EXISTS,
899
- original_exception=ValueError(str(e)),
952
+ original_exception=RuntimeError(str(e)),
900
953
  ) from e
901
954
  except SnowparkSQLException as e:
902
955
  raise snowml_exceptions.SnowflakeMLException(
903
956
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
904
- original_exception=RuntimeError(f"An error occurred during Dataset generation: {e}."),
957
+ original_exception=RuntimeError(f"An error occurred during dataset generation: {e}."),
905
958
  ) from e
906
959
 
907
960
  @dispatch_decorator()
@@ -930,52 +983,47 @@ class FeatureStore:
930
983
  return self._load_serialized_feature_objects(source_meta.properties.serialized_feature_views)
931
984
 
932
985
  @dispatch_decorator()
933
- def clear(self) -> None:
986
+ def _clear(self, dryrun: bool = True) -> None:
934
987
  """
935
- Clear all feature store internal objects including feature views, entities etc. Note feature store
936
- instance (snowflake schema) won't be deleted. Use snowflake to delete feature store instance.
988
+ Clear all feature views and entities. Note Feature Store schema and metadata will NOT be purged
989
+ together. Use SQL to delete schema and metadata instead.
937
990
 
938
- Raises:
939
- SnowflakeMLException: [RuntimeError] Failed to clear feature store.
991
+ Args:
992
+ dryrun: Print a list of objects will be deleted but not actually perform the deletion when true.
940
993
  """
941
- try:
942
- result = self._session.sql(
943
- f"""
944
- SELECT *
945
- FROM {self._config.database}.INFORMATION_SCHEMA.SCHEMATA
946
- WHERE SCHEMA_NAME = '{self._config.schema.resolved()}'
947
- """
948
- ).collect()
949
- if len(result) == 0:
950
- return
951
-
952
- fs_obj_tag = self._find_object("TAGS", SqlIdentifier(_FEATURE_STORE_OBJECT_TAG))
953
- if len(fs_obj_tag) == 0:
954
- return
955
-
956
- object_types = ["DYNAMIC TABLES", "DATASETS", "VIEWS", "TASKS"]
957
- for obj_type in object_types:
958
- all_object_rows = self._find_object(obj_type, None)
959
- for row in all_object_rows:
960
- obj_name = self._get_fully_qualified_name(SqlIdentifier(row["name"], case_sensitive=True))
961
- self._session.sql(f"DROP {obj_type[:-1]} {obj_name}").collect()
962
- logger.info(f"Deleted {obj_type[:-1]}: {obj_name}.")
963
-
964
- entity_tags = self._find_object("TAGS", SqlIdentifier(_ENTITY_TAG_PREFIX), prefix_match=True)
965
- all_tags = [
966
- _FEATURE_STORE_OBJECT_TAG,
967
- _FEATURE_VIEW_METADATA_TAG,
968
- ] + [SqlIdentifier(row["name"], case_sensitive=True) for row in entity_tags]
969
- for tag_name in all_tags:
970
- obj_name = self._get_fully_qualified_name(tag_name)
971
- self._session.sql(f"DROP TAG IF EXISTS {obj_name}").collect()
972
- logger.info(f"Deleted TAG: {obj_name}.")
994
+ warnings.warn(
995
+ "It will clear ALL feature views and entities in this Feature Store. Make sure your role"
996
+ " has sufficient access to all feature views and entities. Insufficient access to some feature"
997
+ " views or entities will leave Feature Store in an incomplete state.",
998
+ stacklevel=2,
999
+ category=UserWarning,
1000
+ )
1001
+
1002
+ all_fvs_df = self.list_feature_views()
1003
+ all_entities_df = self.list_entities()
1004
+ all_fvs_rows = all_fvs_df.collect()
1005
+ all_entities_rows = all_entities_df.collect()
1006
+
1007
+ if dryrun:
1008
+ logger.info(
1009
+ "Following feature views and entities will be deleted."
1010
+ + " Set 'dryrun=False' to perform the actual deletion."
1011
+ )
1012
+ logger.info(f"Total {len(all_fvs_rows)} Feature views to be deleted:")
1013
+ all_fvs_df.show(n=len(all_fvs_rows))
1014
+ logger.info(f"\nTotal {len(all_entities_rows)} entities to be deleted:")
1015
+ all_entities_df.show(n=len(all_entities_rows))
1016
+ return
1017
+
1018
+ for fv_row in all_fvs_rows:
1019
+ fv = self.get_feature_view(
1020
+ SqlIdentifier(fv_row["NAME"], case_sensitive=True).identifier(), fv_row["VERSION"]
1021
+ )
1022
+ self.delete_feature_view(fv)
1023
+
1024
+ for entity_row in all_entities_rows:
1025
+ self.delete_entity(SqlIdentifier(entity_row["NAME"], case_sensitive=True).identifier())
973
1026
 
974
- except Exception as e:
975
- raise snowml_exceptions.SnowflakeMLException(
976
- error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
977
- original_exception=RuntimeError(f"Failed to clear feature store {self._config.full_schema_path}: {e}."),
978
- ) from e
979
1027
  logger.info(f"Feature store {self._config.full_schema_path} has been cleared.")
980
1028
 
981
1029
  def _get_feature_view_if_exists(self, name: str, version: str) -> FeatureView:
@@ -1093,14 +1141,6 @@ class FeatureStore:
1093
1141
  spine_timestamp_col: Optional[SqlIdentifier],
1094
1142
  include_feature_view_timestamp_col: bool,
1095
1143
  ) -> Tuple[DataFrame, List[SqlIdentifier]]:
1096
- if len(spine_df.queries["queries"]) != 1:
1097
- raise snowml_exceptions.SnowflakeMLException(
1098
- error_code=error_codes.INVALID_ARGUMENT,
1099
- original_exception=ValueError(
1100
- f"spine_df must contain only one query. Got: {spine_df.queries['queries']}"
1101
- ),
1102
- )
1103
-
1104
1144
  for f in features:
1105
1145
  f = f.feature_view_ref if isinstance(f, FeatureViewSlice) else f
1106
1146
  if f.status == FeatureViewStatus.DRAFT:
@@ -1122,7 +1162,7 @@ class FeatureStore:
1122
1162
  self._asof_join_enabled = self._is_asof_join_enabled()
1123
1163
 
1124
1164
  # TODO: leverage Snowpark dataframe for more concise syntax once it supports AsOfJoin
1125
- query = spine_df.queries["queries"][0]
1165
+ query = spine_df.queries["queries"][-1]
1126
1166
  layer = 0
1127
1167
  for f in features:
1128
1168
  if isinstance(f, FeatureViewSlice):
@@ -1180,7 +1220,15 @@ class FeatureStore:
1180
1220
  """
1181
1221
  layer += 1
1182
1222
 
1183
- return self._session.sql(query), join_keys
1223
+ # TODO: construct result dataframe with datframe APIs once ASOF join is supported natively.
1224
+ # Below code manually construct result dataframe from private members of spine dataframe, which
1225
+ # likely will cause unintentional issues. This setp is needed because spine_df might contains
1226
+ # prerequisite queries and post actions that must be carried over to result dataframe.
1227
+ result_df = self._session.sql(query)
1228
+ result_df._plan.queries = spine_df._plan.queries[:-1] + result_df._plan.queries
1229
+ result_df._plan.post_actions = spine_df._plan.post_actions
1230
+
1231
+ return result_df, join_keys
1184
1232
 
1185
1233
  def _check_database_exists_or_throw(self) -> None:
1186
1234
  resolved_db_name = self._config.database.resolved()
@@ -1517,6 +1565,9 @@ class FeatureStore:
1517
1565
  original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
1518
1566
  )
1519
1567
 
1568
+ fv_name = FeatureView._get_physical_name(name, version)
1569
+ infer_schema_df = self._session.sql(f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}")
1570
+
1520
1571
  if m.group("obj_type") == "DYNAMIC TABLE":
1521
1572
  query = m.group("query")
1522
1573
  df = self._session.sql(query)
@@ -1524,7 +1575,7 @@ class FeatureStore:
1524
1575
  fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1525
1576
  entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1526
1577
  ts_col = fv_metadata.timestamp_col
1527
- timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1578
+ timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
1528
1579
 
1529
1580
  fv = FeatureView._construct_feature_view(
1530
1581
  name=name,
@@ -1534,9 +1585,7 @@ class FeatureStore:
1534
1585
  desc=desc,
1535
1586
  version=version,
1536
1587
  status=FeatureViewStatus(row["scheduling_state"]),
1537
- feature_descs=self._fetch_column_descs(
1538
- "DYNAMIC TABLE", SqlIdentifier(row["name"], case_sensitive=True)
1539
- ),
1588
+ feature_descs=self._fetch_column_descs("DYNAMIC TABLE", fv_name),
1540
1589
  refresh_freq=row["target_lag"],
1541
1590
  database=self._config.database.identifier(),
1542
1591
  schema=self._config.schema.identifier(),
@@ -1544,6 +1593,7 @@ class FeatureStore:
1544
1593
  refresh_mode=row["refresh_mode"],
1545
1594
  refresh_mode_reason=row["refresh_mode_reason"],
1546
1595
  owner=row["owner"],
1596
+ infer_schema_df=infer_schema_df,
1547
1597
  )
1548
1598
  return fv
1549
1599
  else:
@@ -1553,7 +1603,7 @@ class FeatureStore:
1553
1603
  fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
1554
1604
  entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
1555
1605
  ts_col = fv_metadata.timestamp_col
1556
- timestamp_col = ts_col if ts_col != _TIMESTAMP_COL_PLACEHOLDER else None
1606
+ timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
1557
1607
 
1558
1608
  fv = FeatureView._construct_feature_view(
1559
1609
  name=name,
@@ -1563,7 +1613,7 @@ class FeatureStore:
1563
1613
  desc=desc,
1564
1614
  version=version,
1565
1615
  status=FeatureViewStatus.STATIC,
1566
- feature_descs=self._fetch_column_descs("VIEW", SqlIdentifier(row["name"], case_sensitive=True)),
1616
+ feature_descs=self._fetch_column_descs("VIEW", fv_name),
1567
1617
  refresh_freq=None,
1568
1618
  database=self._config.database.identifier(),
1569
1619
  schema=self._config.schema.identifier(),
@@ -1571,6 +1621,7 @@ class FeatureStore:
1571
1621
  refresh_mode=None,
1572
1622
  refresh_mode_reason=None,
1573
1623
  owner=row["owner"],
1624
+ infer_schema_df=infer_schema_df,
1574
1625
  )
1575
1626
  return fv
1576
1627
 
@@ -1720,6 +1771,15 @@ class FeatureStore:
1720
1771
  except Exception:
1721
1772
  return False
1722
1773
 
1774
+ def _is_dataset_enabled(self) -> bool:
1775
+ try:
1776
+ self._session.sql(f"SHOW DATASETS IN SCHEMA {self._config.full_schema_path}").collect()
1777
+ return True
1778
+ except SnowparkSQLException as e:
1779
+ if "'DATASETS' does not exist" in e.message:
1780
+ return False
1781
+ raise
1782
+
1723
1783
  def _check_feature_store_object_versions(self) -> None:
1724
1784
  versions = self._collapse_object_versions()
1725
1785
  if len(versions) > 0 and pkg_version.parse(snowml_version.VERSION) < versions[0]:
@@ -5,7 +5,7 @@ import re
5
5
  from collections import OrderedDict
6
6
  from dataclasses import asdict, dataclass
7
7
  from enum import Enum
8
- from typing import Dict, List, Optional
8
+ from typing import Any, Dict, List, Optional
9
9
 
10
10
  from snowflake.ml._internal.exceptions import (
11
11
  error_codes,
@@ -27,7 +27,8 @@ from snowflake.snowpark.types import (
27
27
  )
28
28
 
29
29
  _FEATURE_VIEW_NAME_DELIMITER = "$"
30
- _TIMESTAMP_COL_PLACEHOLDER = "FS_TIMESTAMP_COL_PLACEHOLDER_VAL"
30
+ _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS = ["FS_TIMESTAMP_COL_PLACEHOLDER_VAL", "NULL"]
31
+ _TIMESTAMP_COL_PLACEHOLDER = "NULL"
31
32
  _FEATURE_OBJ_TYPE = "FEATURE_OBJ_TYPE"
32
33
  # Feature view version rule is aligned with dataset version rule in SQL.
33
34
  _FEATURE_VIEW_VERSION_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_.\-]*$")
@@ -121,12 +122,13 @@ class FeatureView:
121
122
  timestamp_col: Optional[str] = None,
122
123
  refresh_freq: Optional[str] = None,
123
124
  desc: str = "",
125
+ **_kwargs: Any,
124
126
  ) -> None:
125
127
  """
126
128
  Create a FeatureView instance.
127
129
 
128
130
  Args:
129
- name: name of the FeatureView. NOTE: FeatureView name will be capitalized.
131
+ name: name of the FeatureView. NOTE: following Snowflake identifier rule
130
132
  entities: entities that the FeatureView is associated with.
131
133
  feature_df: Snowpark DataFrame containing data source and all feature feature_df logics.
132
134
  Final projection of the DataFrame should contain feature names, join keys and timestamp(if applicable).
@@ -140,6 +142,7 @@ class FeatureView:
140
142
  NOTE: If refresh_freq is not provided, then FeatureView will be registered as View on Snowflake backend
141
143
  and there won't be extra storage cost.
142
144
  desc: description of the FeatureView.
145
+ _kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
143
146
  """
144
147
 
145
148
  self._name: SqlIdentifier = SqlIdentifier(name)
@@ -149,6 +152,7 @@ class FeatureView:
149
152
  SqlIdentifier(timestamp_col) if timestamp_col is not None else None
150
153
  )
151
154
  self._desc: str = desc
155
+ self._infer_schema_df: DataFrame = _kwargs.get("_infer_schema_df", self._feature_df)
152
156
  self._query: str = self._get_query()
153
157
  self._version: Optional[FeatureViewVersion] = None
154
158
  self._status: FeatureViewStatus = FeatureViewStatus.DRAFT
@@ -295,7 +299,7 @@ class FeatureView:
295
299
 
296
300
  @property
297
301
  def output_schema(self) -> StructType:
298
- return self._feature_df.schema
302
+ return self._infer_schema_df.schema
299
303
 
300
304
  @property
301
305
  def refresh_mode(self) -> Optional[str]:
@@ -329,7 +333,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
329
333
  f"FeatureView name `{self._name}` contains invalid character `{_FEATURE_VIEW_NAME_DELIMITER}`."
330
334
  )
331
335
 
332
- unescaped_df_cols = to_sql_identifiers(self._feature_df.columns)
336
+ unescaped_df_cols = to_sql_identifiers(self._infer_schema_df.columns)
333
337
  for e in self._entities:
334
338
  for k in e.join_keys:
335
339
  if k not in unescaped_df_cols:
@@ -341,17 +345,17 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
341
345
  ts_col = self._timestamp_col
342
346
  if ts_col == SqlIdentifier(_TIMESTAMP_COL_PLACEHOLDER):
343
347
  raise ValueError(f"Invalid timestamp_col name, cannot be {_TIMESTAMP_COL_PLACEHOLDER}.")
344
- if ts_col not in to_sql_identifiers(self._feature_df.columns):
348
+ if ts_col not in to_sql_identifiers(self._infer_schema_df.columns):
345
349
  raise ValueError(f"timestamp_col {ts_col} is not found in input dataframe.")
346
350
 
347
- col_type = self._feature_df.schema[ts_col].datatype
351
+ col_type = self._infer_schema_df.schema[ts_col].datatype
348
352
  if not isinstance(col_type, (DateType, TimeType, TimestampType, _NumericType)):
349
353
  raise ValueError(f"Invalid data type for timestamp_col {ts_col}: {col_type}.")
350
354
 
351
355
  def _get_feature_names(self) -> List[SqlIdentifier]:
352
356
  join_keys = [k for e in self._entities for k in e.join_keys]
353
357
  ts_col = [self._timestamp_col] if self._timestamp_col is not None else []
354
- feature_names = to_sql_identifiers(self._feature_df.columns, case_sensitive=True)
358
+ feature_names = to_sql_identifiers(self._infer_schema_df.columns, case_sensitive=False)
355
359
  return [c for c in feature_names if c not in join_keys + ts_col]
356
360
 
357
361
  def __repr__(self) -> str:
@@ -384,6 +388,9 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
384
388
  fv_dict = self.__dict__.copy()
385
389
  if "_feature_df" in fv_dict:
386
390
  fv_dict.pop("_feature_df")
391
+ if "_infer_schema_df" in fv_dict:
392
+ infer_schema_df = fv_dict.pop("_infer_schema_df")
393
+ fv_dict["_infer_schema_query"] = infer_schema_df.queries["queries"][0]
387
394
  fv_dict["_entities"] = [e._to_dict() for e in self._entities]
388
395
  fv_dict["_status"] = str(self._status)
389
396
  fv_dict["_name"] = str(self._name) if self._name is not None else None
@@ -440,6 +447,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
440
447
  refresh_mode=json_dict["_refresh_mode"],
441
448
  refresh_mode_reason=json_dict["_refresh_mode_reason"],
442
449
  owner=json_dict["_owner"],
450
+ infer_schema_df=session.sql(json_dict.get("_infer_schema_query", None)),
443
451
  )
444
452
 
445
453
  @staticmethod
@@ -471,6 +479,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
471
479
  refresh_mode: Optional[str],
472
480
  refresh_mode_reason: Optional[str],
473
481
  owner: Optional[str],
482
+ infer_schema_df: Optional[DataFrame],
474
483
  ) -> FeatureView:
475
484
  fv = FeatureView(
476
485
  name=name,
@@ -478,6 +487,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
478
487
  feature_df=feature_df,
479
488
  timestamp_col=timestamp_col,
480
489
  desc=desc,
490
+ _infer_schema_df=infer_schema_df,
481
491
  )
482
492
  fv._version = FeatureViewVersion(version) if version is not None else None
483
493
  fv._status = status
@@ -78,22 +78,26 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
78
78
  match = _SNOWURL_PATH_RE.fullmatch(file)
79
79
  assert match is not None and match.group("filepath") is not None
80
80
  versions_dict[match.group("version")].append(match.group("filepath"))
81
- presigned_urls: List[Tuple[str, str]] = []
82
81
  try:
82
+ async_jobs: List[snowpark.AsyncJob] = []
83
83
  for version, version_files in versions_dict.items():
84
84
  for file in version_files:
85
85
  stage_loc = f"{self.stage_name}/versions/{version}"
86
- presigned_urls.extend(
87
- self._session.sql(
88
- f"select '{version}/{file}' as name,"
89
- f" get_presigned_url('{stage_loc}', '{file}', {url_lifetime}) as url"
90
- ).collect(
91
- statement_params=telemetry.get_function_usage_statement_params(
92
- project=stage_fs._PROJECT,
93
- api_calls=[snowpark.DataFrame.collect],
94
- ),
95
- )
86
+ query_result = self._session.sql(
87
+ f"select '{version}/{file}' as name,"
88
+ f" get_presigned_url('{stage_loc}', '{file}', {url_lifetime}) as url"
89
+ ).collect(
90
+ block=False,
91
+ statement_params=telemetry.get_function_usage_statement_params(
92
+ project=stage_fs._PROJECT,
93
+ api_calls=[snowpark.DataFrame.collect],
94
+ ),
96
95
  )
96
+ async_jobs.append(query_result)
97
+ presigned_urls: List[Tuple[str, str]] = [
98
+ (r["NAME"], r["URL"]) for job in async_jobs for r in stage_fs._resolve_async_job(job)
99
+ ]
100
+ return presigned_urls
97
101
  except snowpark_exceptions.SnowparkClientException as e:
98
102
  if e.message.startswith(fileset_errors.ERRNO_DOMAIN_NOT_EXIST) or e.message.startswith(
99
103
  fileset_errors.ERRNO_STAGE_NOT_EXIST
@@ -109,7 +113,6 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
109
113
  error_code=error_codes.INTERNAL_SNOWML_ERROR,
110
114
  original_exception=fileset_errors.FileSetError(str(e)),
111
115
  )
112
- return presigned_urls
113
116
 
114
117
  @classmethod
115
118
  def _parent(cls, path: str) -> str:
@@ -104,7 +104,8 @@ class SnowFileSystem(sfcfs.SFFileSystem):
104
104
  if self._IS_BUGGED_VERSION:
105
105
  match = _SNOWURL_PATTERN.fullmatch(abs_path)
106
106
  assert match is not None
107
- abs_path = abs_path.replace(match.group("relpath"), match.group("relpath").lstrip("/"))
107
+ if match.group("relpath"):
108
+ abs_path = abs_path.replace(match.group("relpath"), match.group("relpath").lstrip("/"))
108
109
  return abs_path
109
110
 
110
111
  @classmethod
@@ -145,7 +146,7 @@ class SnowFileSystem(sfcfs.SFFileSystem):
145
146
  logging.debug(f"Parsed snow URL: {snowurl_match.groups()}")
146
147
  # FIXME(dhung): Temporary fix for bug in GS version 8.17
147
148
  if cls._IS_BUGGED_VERSION:
148
- filepath = filepath.replace(f"{version}/", f"{version}//")
149
+ filepath = f"versions/{version}//{relative_path}"
149
150
  return _SFFileEntityPath(
150
151
  domain=domain, name=name, version=version, relative_path=relative_path, filepath=filepath
151
152
  )