wedata-feature-engineering 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {feature_store → wedata}/__init__.py +1 -1
  2. {feature_store → wedata/feature_store}/client.py +113 -41
  3. {feature_store → wedata/feature_store}/constants/constants.py +19 -0
  4. {feature_store → wedata/feature_store}/entities/column_info.py +4 -4
  5. {feature_store → wedata/feature_store}/entities/feature_lookup.py +5 -1
  6. {feature_store → wedata/feature_store}/entities/feature_spec.py +46 -46
  7. wedata/feature_store/entities/feature_table.py +107 -0
  8. {feature_store → wedata/feature_store}/entities/training_set.py +13 -12
  9. {feature_store → wedata/feature_store}/feature_table_client/feature_table_client.py +85 -30
  10. {feature_store → wedata/feature_store}/spark_client/spark_client.py +30 -56
  11. wedata/feature_store/training_set_client/training_set_client.py +367 -0
  12. wedata/feature_store/utils/__init__.py +0 -0
  13. feature_store/utils/utils.py → wedata/feature_store/utils/common_utils.py +108 -54
  14. {feature_store → wedata/feature_store}/utils/feature_lookup_utils.py +6 -6
  15. {feature_store → wedata/feature_store}/utils/feature_spec_utils.py +6 -6
  16. {feature_store → wedata/feature_store}/utils/feature_utils.py +5 -5
  17. wedata/feature_store/utils/on_demand_utils.py +107 -0
  18. {feature_store → wedata/feature_store}/utils/schema_utils.py +1 -1
  19. wedata/feature_store/utils/signature_utils.py +205 -0
  20. {feature_store → wedata/feature_store}/utils/training_set_utils.py +18 -19
  21. {feature_store → wedata/feature_store}/utils/uc_utils.py +1 -1
  22. {wedata_feature_engineering-0.1.4.dist-info → wedata_feature_engineering-0.1.6.dist-info}/METADATA +1 -1
  23. wedata_feature_engineering-0.1.6.dist-info/RECORD +43 -0
  24. wedata_feature_engineering-0.1.6.dist-info/top_level.txt +1 -0
  25. feature_store/entities/feature_table.py +0 -164
  26. feature_store/training_set_client/training_set_client.py +0 -196
  27. feature_store/utils/common_utils.py +0 -96
  28. wedata_feature_engineering-0.1.4.dist-info/RECORD +0 -41
  29. wedata_feature_engineering-0.1.4.dist-info/top_level.txt +0 -1
  30. {feature_store/constants → wedata/feature_store}/__init__.py +0 -0
  31. {feature_store/entities → wedata/feature_store/constants}/__init__.py +0 -0
  32. {feature_store/feature_table_client → wedata/feature_store/entities}/__init__.py +0 -0
  33. {feature_store → wedata/feature_store}/entities/data_type.py +0 -0
  34. {feature_store → wedata/feature_store}/entities/environment_variables.py +0 -0
  35. {feature_store → wedata/feature_store}/entities/feature.py +0 -0
  36. {feature_store → wedata/feature_store}/entities/feature_column_info.py +0 -0
  37. {feature_store → wedata/feature_store}/entities/feature_function.py +0 -0
  38. {feature_store → wedata/feature_store}/entities/feature_spec_constants.py +0 -0
  39. {feature_store → wedata/feature_store}/entities/feature_table_info.py +0 -0
  40. {feature_store → wedata/feature_store}/entities/function_info.py +0 -0
  41. {feature_store → wedata/feature_store}/entities/on_demand_column_info.py +0 -0
  42. {feature_store → wedata/feature_store}/entities/source_data_column_info.py +0 -0
  43. {feature_store/spark_client → wedata/feature_store/feature_table_client}/__init__.py +0 -0
  44. {feature_store/training_set_client → wedata/feature_store/spark_client}/__init__.py +0 -0
  45. {feature_store/utils → wedata/feature_store/training_set_client}/__init__.py +0 -0
  46. {feature_store → wedata/feature_store}/utils/topological_sort.py +0 -0
  47. {feature_store → wedata/feature_store}/utils/validation_utils.py +0 -0
  48. {wedata_feature_engineering-0.1.4.dist-info → wedata_feature_engineering-0.1.6.dist-info}/WHEEL +0 -0
@@ -0,0 +1,107 @@
1
+ import copy
2
+ from typing import Dict, List
3
+
4
+ from pyspark.sql import DataFrame
5
+ from pyspark.sql.functions import expr
6
+
7
+ from wedata.feature_store.entities.feature_function import FeatureFunction
8
+ from wedata.feature_store.entities.function_info import FunctionInfo
9
+ from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
10
+ from wedata.feature_store.utils import common_utils, uc_utils
11
+
12
+
13
+ def _udf_expr(udf_name: str, arguments: List[str]) -> expr:
14
+ """
15
+ Generate a Spark SQL expression, e.g. expr("udf_name(col1, col2)")
16
+ """
17
+ arguments_str = ", ".join(common_utils.sanitize_identifiers(arguments))
18
+ return expr(f"{udf_name}({arguments_str})")
19
+
20
+
21
+ def _validate_apply_functions_df(
22
+ df: DataFrame,
23
+ functions_to_apply: List[OnDemandColumnInfo],
24
+ uc_function_infos: Dict[str, FunctionInfo],
25
+ ):
26
+ """
27
+ Validate the following:
28
+ 1. On-demand input columns specified by functions_to_apply exist in the DataFrame.
29
+ 2. On-demand input columns have data types that match those of UDF parameters.
30
+ """
31
+ for odci in functions_to_apply:
32
+ function_info = uc_function_infos[odci.udf_name]
33
+ types_dict = dict(df.dtypes)
34
+
35
+ for p in function_info.input_params:
36
+ arg_column = odci.input_bindings[p.name]
37
+ if arg_column not in df.columns:
38
+ raise ValueError(
39
+ f"FeatureFunction argument column '{arg_column}' for UDF '{odci.udf_name}' parameter '{p.name}' "
40
+ f"does not exist in provided DataFrame with schema '{df.schema}'."
41
+ )
42
+ if types_dict[arg_column] != p.type_text:
43
+ raise ValueError(
44
+ f"FeatureFunction argument column '{arg_column}' for UDF '{odci.udf_name}' parameter '{p.name}' "
45
+ f"does not have the expected type. Argument column '{arg_column}' has type "
46
+ f"'{types_dict[arg_column]}' and parameter '{p.name}' has type '{p.type_text}'."
47
+ )
48
+
49
+
50
+ def apply_functions_if_not_overridden(
51
+ df: DataFrame,
52
+ functions_to_apply: List[OnDemandColumnInfo],
53
+ uc_function_infos: Dict[str, FunctionInfo],
54
+ ) -> DataFrame:
55
+ """
56
+ For all on-demand features, in the order defined by the FeatureSpec:
57
+ If the feature does not already exist, append the evaluated UDF expression.
58
+ Existing column values or column positions are not modified.
59
+
60
+ `_validate_apply_functions_df` validates UDFs can be applied on `df` schema.
61
+
62
+ The caller should validate:
63
+ 1. FeatureFunction bound argument columns for UDF parameters exist in FeatureSpec defined features.
64
+ 2. FeatureFunction output feature names are unique.
65
+ """
66
+ _validate_apply_functions_df(
67
+ df=df,
68
+ functions_to_apply=functions_to_apply,
69
+ uc_function_infos=uc_function_infos,
70
+ )
71
+
72
+ columns = {}
73
+ for odci in functions_to_apply:
74
+ if odci.output_name not in df.columns:
75
+ function_info = uc_function_infos[odci.udf_name]
76
+ # Resolve the bound arguments in the UDF parameter order
77
+ udf_arguments = [
78
+ odci.input_bindings[p.name] for p in function_info.input_params
79
+ ]
80
+ columns[odci.output_name] = _udf_expr(odci.udf_name, udf_arguments)
81
+ return df.withColumns(columns)
82
+
83
+
84
+ def get_feature_functions_with_full_udf_names(
85
+ feature_functions: List[FeatureFunction], current_catalog: str, current_schema: str
86
+ ):
87
+ """
88
+ Takes in a list of FeatureFunctions, and returns copies with:
89
+ 1. Fully qualified UDF names.
90
+ 2. If output_name is empty, fully qualified UDF names as output_name.
91
+ """
92
+ udf_names = {ff.udf_name for ff in feature_functions}
93
+ uc_utils._check_qualified_udf_names(udf_names)
94
+ uc_utils._verify_all_udfs_in_uc(udf_names, current_catalog, current_schema)
95
+
96
+ standardized_feature_functions = []
97
+ for ff in feature_functions:
98
+ ff_copy = copy.deepcopy(ff)
99
+ del ff
100
+
101
+ ff_copy._udf_name = uc_utils.get_full_udf_name(
102
+ ff_copy.udf_name, current_catalog, current_schema
103
+ )
104
+ if not ff_copy.output_name:
105
+ ff_copy._output_name = ff_copy.udf_name
106
+ standardized_feature_functions.append(ff_copy)
107
+ return standardized_feature_functions
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
 
3
- from feature_store.constants.constants import _ERROR, _WARN
3
+ from wedata.feature_store.constants.constants import _ERROR, _WARN
4
4
 
5
5
  _logger = logging.getLogger(__name__)
6
6
 
@@ -0,0 +1,205 @@
1
+ import logging
2
+ from typing import Any, Dict, Optional
3
+
4
+ import mlflow
5
+ from mlflow.models import ModelSignature
6
+ from mlflow.types import ColSpec
7
+ from mlflow.types import DataType as MlflowDataType
8
+ from mlflow.types import ParamSchema, Schema
9
+
10
+ from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
11
+ from wedata.feature_store.entities.feature_spec import FeatureSpec
12
+ from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
13
+ from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
14
+
15
+ _logger = logging.getLogger(__name__)
16
+
17
+ # Some types (array, map, decimal, timestamp_ntz) are unsupported due to MLflow signatures
18
+ # lacking any equivalent types. We thus cannot construct a ColSpec for any column
19
+ # that uses these types.
20
+ SUPPORTED_TYPE_MAP = {
21
+ "smallint": MlflowDataType.integer, # Upcast to integer
22
+ "int": MlflowDataType.integer,
23
+ "bigint": MlflowDataType.long,
24
+ "float": MlflowDataType.float,
25
+ "double": MlflowDataType.double,
26
+ "boolean": MlflowDataType.boolean,
27
+ "date": MlflowDataType.datetime,
28
+ "timestamp": MlflowDataType.datetime,
29
+ "string": MlflowDataType.string,
30
+ "binary": MlflowDataType.binary,
31
+ }
32
+
33
+
34
+ def is_unsupported_type(type_str: str):
35
+ return type_str not in SUPPORTED_TYPE_MAP
36
+
37
+
38
+ def convert_spark_data_type_to_mlflow_signature_type(spark_type):
39
+ """
40
+ Maps Databricks SQL types to MLflow signature types.
41
+ docs.databricks.com/sql/language-manual/sql-ref-datatypes.html#language-mappings
42
+ """
43
+ return SUPPORTED_TYPE_MAP.get(spark_type)
44
+
45
+
46
+ def get_input_schema_from_feature_spec(feature_spec: FeatureSpec) -> Schema:
47
+ """
48
+ Produces an MLflow signature schema from a feature spec.
49
+ Source data columns are marked as required inputs and feature columns
50
+ (both lookups and on-demand features) are marked as optional inputs.
51
+
52
+ :param feature_spec: FeatureSpec object with datatypes for each column.
53
+ """
54
+ # If we're missing any data types for any column, we are likely dealing with a
55
+ # malformed feature spec and should halt signature construction.
56
+ if any([ci.data_type is None for ci in feature_spec.column_infos]):
57
+ raise Exception("Training set does not contain column data types.")
58
+
59
+ source_data_cols = [
60
+ ci
61
+ for ci in feature_spec.column_infos
62
+ if isinstance(ci.info, SourceDataColumnInfo)
63
+ ]
64
+ # Don't create signature if any source data columns (required) are of complex types.
65
+ if any(
66
+ [
67
+ ci.data_type is None or is_unsupported_type(ci.data_type)
68
+ for ci in source_data_cols
69
+ ]
70
+ ):
71
+ raise Exception(
72
+ "Input DataFrame contains column data types not supported by "
73
+ "MLflow model signatures."
74
+ )
75
+ required_input_colspecs = [
76
+ ColSpec(
77
+ convert_spark_data_type_to_mlflow_signature_type(ci.data_type),
78
+ ci.info.output_name,
79
+ required=True,
80
+ )
81
+ for ci in source_data_cols
82
+ ]
83
+ feature_cols = [
84
+ ci
85
+ for ci in feature_spec.column_infos
86
+ if isinstance(ci.info, (FeatureColumnInfo, OnDemandColumnInfo))
87
+ ]
88
+ unsupported_feature_cols = [
89
+ ci for ci in feature_cols if is_unsupported_type(ci.data_type)
90
+ ]
91
+ optional_input_colspecs = [
92
+ ColSpec(
93
+ convert_spark_data_type_to_mlflow_signature_type(ci.data_type),
94
+ ci.output_name,
95
+ required=False,
96
+ )
97
+ for ci in feature_cols
98
+ if not is_unsupported_type(ci.data_type)
99
+ ]
100
+ if unsupported_feature_cols:
101
+ feat_string = ", ".join(
102
+ [f"{ci.output_name} ({ci.data_type})" for ci in unsupported_feature_cols]
103
+ )
104
+ _logger.warning(
105
+ f"The following features will not be included in the input schema because their"
106
+ f" data types are not supported by MLflow model signatures: {feat_string}. "
107
+ f"These features cannot be overridden during model serving."
108
+ )
109
+ return Schema(required_input_colspecs + optional_input_colspecs)
110
+
111
+
112
+ def get_output_schema_from_labels(label_type_map: Optional[Dict[str, str]]) -> Schema:
113
+ """
114
+ Produces an MLflow signature schema from the provided label type map.
115
+ :param label_type_map: Map label column name -> data type
116
+ """
117
+ if not label_type_map:
118
+ raise Exception("Training set does not contain a label.")
119
+ if any([is_unsupported_type(dtype) for dtype in label_type_map.values()]):
120
+ raise Exception(
121
+ "Labels are of data types not supported by MLflow model signatures."
122
+ )
123
+ else:
124
+ output_colspecs = [
125
+ ColSpec(
126
+ convert_spark_data_type_to_mlflow_signature_type(spark_type),
127
+ col_name,
128
+ required=True,
129
+ )
130
+ for col_name, spark_type in label_type_map.items()
131
+ ]
132
+ return Schema(output_colspecs)
133
+
134
+
135
+ def get_mlflow_signature_from_feature_spec(
136
+ feature_spec: FeatureSpec,
137
+ label_type_map: Optional[Dict[str, str]],
138
+ override_output_schema: Optional[Schema],
139
+ params: Optional[Dict[str, Any]] = None,
140
+ ) -> Optional[ModelSignature]:
141
+ """
142
+ Produce an MLflow signature from a feature spec and label type map.
143
+ Source data columns are marked as required inputs and feature columns
144
+ (both lookups and on-demand features) are marked as optional inputs.
145
+
146
+ Reads output types from the cached label -> datatype map in the training set.
147
+ If override_output_schema is provided, it will always be used as the output schema.
148
+
149
+ :param feature_spec: FeatureSpec object with datatypes for each column.
150
+ :param label_type_map: Map of label column name -> datatype
151
+ :param override_output_schema: User-provided output schema to use if provided.
152
+ """
153
+ kwargs = {}
154
+ kwargs["inputs"] = get_input_schema_from_feature_spec(feature_spec)
155
+ try:
156
+ output_schema = override_output_schema or get_output_schema_from_labels(
157
+ label_type_map
158
+ )
159
+ kwargs["outputs"] = output_schema
160
+ except Exception as e:
161
+ _logger.warning(f"Could not infer an output schema: {e}")
162
+
163
+ if params:
164
+ try:
165
+ from mlflow.types.utils import _infer_param_schema
166
+
167
+ kwargs["params"] = _infer_param_schema(params)
168
+ except Exception as e:
169
+ _logger.warning(f"Could not infer params schema: {e}")
170
+
171
+ return mlflow.models.ModelSignature(**kwargs)
172
+
173
+
174
+ def drop_signature_inputs_and_invalid_params(signature):
175
+ """
176
+ Drop ModelSignature inputs field and invalid params from params field.
177
+ This is useful for feature store model's raw_model.
178
+ Feature store model's input schema does not apply to raw_model's input,
179
+ so we drop the inputs field of raw_model's signature.
180
+ Feature store model's result_type param enables setting and overriding
181
+ a default result_type for predictions, but this interferes with params
182
+ passed to MLflow's predict function, so we drop result_type from
183
+ the params field of raw_model's signature.
184
+
185
+ :param signature: ModelSignature object.
186
+ """
187
+ if signature:
188
+ outputs_schema = signature.outputs
189
+ params_schema = signature.params if hasattr(signature, "params") else None
190
+ try:
191
+ # Only for mlflow>=2.6.0 ModelSignature contains params attribute
192
+ if params_schema:
193
+ updated_params_schema = ParamSchema(
194
+ [param for param in params_schema if param.name != "result_type"]
195
+ )
196
+ return ModelSignature(
197
+ outputs=outputs_schema, params=updated_params_schema
198
+ )
199
+ if outputs_schema:
200
+ return ModelSignature(outputs=outputs_schema)
201
+ except TypeError:
202
+ _logger.warning(
203
+ "ModelSignature without inputs is not supported, please upgrade "
204
+ "mlflow >= 2.7.0 to use the feature."
205
+ )
@@ -4,21 +4,19 @@ from typing import Dict, List, Optional, Set
4
4
 
5
5
  from pyspark.sql import DataFrame
6
6
 
7
- from feature_store.entities.column_info import ColumnInfo
8
- from feature_store.entities.feature import Feature
9
- from feature_store.entities.feature_column_info import FeatureColumnInfo
10
- from feature_store.entities.feature_lookup import FeatureLookup
11
- from feature_store.entities.feature_spec import FeatureSpec
12
- from feature_store.entities.feature_table import FeatureTable
13
- from feature_store.entities.feature_table_info import FeatureTableInfo
14
- from feature_store.entities.function_info import FunctionInfo
15
- from feature_store.entities.on_demand_column_info import OnDemandColumnInfo
16
- from feature_store.entities.source_data_column_info import SourceDataColumnInfo
17
-
18
- from feature_store.constants.constants import _ERROR, _WARN
19
-
20
- from feature_store.utils import common_utils, validation_utils, uc_utils, schema_utils, utils
21
- from feature_store.utils.feature_spec_utils import assign_topological_ordering
7
+ from wedata.feature_store.entities.column_info import ColumnInfo
8
+ from wedata.feature_store.entities.feature import Feature
9
+ from wedata.feature_store.entities.feature_column_info import FeatureColumnInfo
10
+ from wedata.feature_store.entities.feature_lookup import FeatureLookup
11
+ from wedata.feature_store.entities.feature_spec import FeatureSpec
12
+ from wedata.feature_store.entities.feature_table import FeatureTable
13
+ from wedata.feature_store.entities.feature_table_info import FeatureTableInfo
14
+ from wedata.feature_store.entities.function_info import FunctionInfo
15
+ from wedata.feature_store.entities.on_demand_column_info import OnDemandColumnInfo
16
+ from wedata.feature_store.entities.source_data_column_info import SourceDataColumnInfo
17
+
18
+ from wedata.feature_store.utils import common_utils, validation_utils, uc_utils, schema_utils
19
+ from wedata.feature_store.utils.feature_spec_utils import assign_topological_ordering
22
20
 
23
21
  _logger = logging.getLogger(__name__)
24
22
 
@@ -99,9 +97,9 @@ def _explode_feature_lookup(
99
97
  FeatureColumnInfo(
100
98
  table_name=feature_lookup.table_name,
101
99
  feature_name=feature_name,
102
- lookup_key=utils.as_list(feature_lookup.lookup_key),
100
+ lookup_key=common_utils.as_list(feature_lookup.lookup_key),
103
101
  output_name=(feature_lookup._get_output_name(feature_name)),
104
- timestamp_lookup_key=utils.as_list(
102
+ timestamp_lookup_key=common_utils.as_list(
105
103
  feature_lookup.timestamp_lookup_key, default=[]
106
104
  ),
107
105
  )
@@ -280,13 +278,14 @@ def get_table_metadata(
280
278
  feature_table_metadata_map = get_feature_table_metadata_for_tables(
281
279
  spark_client, table_names=table_names
282
280
  )
281
+
283
282
  feature_table_data_map = load_feature_data_for_tables(
284
283
  spark_client, table_names=table_names
285
284
  )
286
285
  return _FeatureTableMetadata(
287
286
  feature_table_features_map,
288
287
  feature_table_metadata_map,
289
- feature_table_data_map,
288
+ feature_table_data_map
290
289
  )
291
290
 
292
291
 
@@ -515,7 +514,7 @@ def build_feature_spec(
515
514
  for table_name in consumed_table_names
516
515
  ]
517
516
  function_infos = [
518
- FunctionInfo(udf_name=udf_name) for udf_name in consumed_udf_names
517
+ FunctionInfo(full_name=udf_name) for udf_name in consumed_udf_names
519
518
  ]
520
519
 
521
520
  # Build FeatureSpec
@@ -2,7 +2,7 @@ import copy
2
2
  import re
3
3
  from typing import Optional, Set
4
4
 
5
- from feature_store.entities.feature_spec import FeatureSpec
5
+ from wedata.feature_store.entities.feature_spec import FeatureSpec
6
6
 
7
7
  SINGLE_LEVEL_NAMESPACE_REGEX = r"^[^\. \/\x00-\x1F\x7F]+$"
8
8
  TWO_LEVEL_NAMESPACE_REGEX = r"^[^\. \/\x00-\x1F\x7F]+(\.[^\. \/\x00-\x1F\x7F]+)$"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wedata-feature-engineering
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Wedata Feature Engineering Library
5
5
  Home-page:
6
6
  Author: meahqian
@@ -0,0 +1,43 @@
1
+ wedata/__init__.py,sha256=26GwucASB9KsmU109sN-VKotEKp1WZYQDGP0wgWZrzY,101
2
+ wedata/feature_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ wedata/feature_store/client.py,sha256=7a-9C8HIBHnQNQD6I4W3UtBQwkJE8G-Q7N24zydjpkY,8100
4
+ wedata/feature_store/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ wedata/feature_store/constants/constants.py,sha256=b4tgcSt66YIq0Fg7pMbqvbqPOI77Cz8znLVZ4ihUKss,1479
6
+ wedata/feature_store/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ wedata/feature_store/entities/column_info.py,sha256=sU_WD9U0qse0M2speowLY30qSE6j4_57PuvtdPkwiFY,4192
8
+ wedata/feature_store/entities/data_type.py,sha256=VpHS6Fr3TphQQ8NbAcEnDJ-8eOZV6ivYuWxv3pAM2RM,3394
9
+ wedata/feature_store/entities/environment_variables.py,sha256=ZEFml5H9MQuzBKM074mUrFYu-Sga4Knmxqiwpke2WGc,1679
10
+ wedata/feature_store/entities/feature.py,sha256=wX8fTBlJq3GYdj9rrBDCY3kFgcVBBAiOOZdxEhnQkNQ,1241
11
+ wedata/feature_store/entities/feature_column_info.py,sha256=-TGxRafYUaNKe0YzHus2XbfRaVrMv7pcffMdbtTT4nA,2031
12
+ wedata/feature_store/entities/feature_function.py,sha256=R17INrCE-U_Uj9KLbFz69aYlOkTETTwQHMMo470F4lQ,1865
13
+ wedata/feature_store/entities/feature_lookup.py,sha256=YjYz8kLq42doFbgPzpmm1r3GPhPYkLsIss4H71x-KAo,8009
14
+ wedata/feature_store/entities/feature_spec.py,sha256=60RUOOe9y_Xsd1I3xqq4NZYnaox4_jjwSyGRTKXLiIw,20041
15
+ wedata/feature_store/entities/feature_spec_constants.py,sha256=YWDBfRiNDe6fUJFUBo3V4WYg2xsljoPAE-ZejfFZCgM,785
16
+ wedata/feature_store/entities/feature_table.py,sha256=dHZHSDPD4HJ2XanLVIrVTkaCYUeqZ6eWEpA0d3YO71g,4010
17
+ wedata/feature_store/entities/feature_table_info.py,sha256=2vUaVdW_jw1dRAlmJWvBRueuMeuqWu_NYB9SlxLI7Uw,1126
18
+ wedata/feature_store/entities/function_info.py,sha256=l0kmiq2R_QNfSMJ7y0xZohlMiemgYSr1dN5vzV8ijIs,7314
19
+ wedata/feature_store/entities/on_demand_column_info.py,sha256=Eh5ieaj1TxC7DG6ipBZzH2ZyY0bwkLrDOkuZjgYr4gY,1297
20
+ wedata/feature_store/entities/source_data_column_info.py,sha256=a9jQOJvehwDIrKPwsP6W9YRBSPNK2nZYypE6-p80CwA,542
21
+ wedata/feature_store/entities/training_set.py,sha256=ylt1h6Z_xU8hKYvnvd80CeewTGSN68-_kvFpoliwH7s,5679
22
+ wedata/feature_store/feature_table_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ wedata/feature_store/feature_table_client/feature_table_client.py,sha256=nrnY3FLQnMhW1BzByDjjfU89hirgaKlg2l2tAfcjvyM,12138
24
+ wedata/feature_store/spark_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
+ wedata/feature_store/spark_client/spark_client.py,sha256=DBCYjLsFrIVRvLErTNyfLIHRul3v0y9uZIY2JR1N92s,10323
26
+ wedata/feature_store/training_set_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
+ wedata/feature_store/training_set_client/training_set_client.py,sha256=gHeZU0rvvUcyNTfroXD3LAinFPdhDpnwTOIWj6z84Tc,15102
28
+ wedata/feature_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
+ wedata/feature_store/utils/common_utils.py,sha256=rmGXfBoEyDMlfqd7lxpEmKJaLoQ-d-ufWpAcE8nSHqA,10009
30
+ wedata/feature_store/utils/feature_lookup_utils.py,sha256=da6ULwf5D-FRVpZoNyag1rroBfq_XPSH4a3uEMB_8io,22372
31
+ wedata/feature_store/utils/feature_spec_utils.py,sha256=GFwc-WT6nN1tnal5F2c0bgVDRhH-YW58S0GaHBPZEsQ,11624
32
+ wedata/feature_store/utils/feature_utils.py,sha256=KKq28bVB_lCuhnR9Hk6JegJBOVgcelWlvrRM-F9onkA,2796
33
+ wedata/feature_store/utils/on_demand_utils.py,sha256=pazZRG5c0Se08MV_inBddIeX4Q9xlVN_H9SC_WK3xzs,4260
34
+ wedata/feature_store/utils/schema_utils.py,sha256=y6EYY1pUxjVg6MP4C7avdW8ZEBBaDo1YTV2CmPF4i8o,4491
35
+ wedata/feature_store/utils/signature_utils.py,sha256=_4_mo1Qlzklp-JrISMS3Jv89MPbaH6rz_cRDvJqFNXM,7957
36
+ wedata/feature_store/utils/topological_sort.py,sha256=ebzKxmxeCLk9seB1zR0ASCGXsZsa-DjxJeTc4KUadtg,6475
37
+ wedata/feature_store/utils/training_set_utils.py,sha256=MYsPZS1d9HKswHgjgxD8K7H9N3dWPyyTTx20Mkp4PVU,22497
38
+ wedata/feature_store/utils/uc_utils.py,sha256=A-W8Cd8yvTmAMEWaHeWmGmcIDMvUtjAfx2G2x_di1QE,10774
39
+ wedata/feature_store/utils/validation_utils.py,sha256=FslvrNs3kstqvM6THScLOluEE6O9RWlDrD9xiihTzlw,1735
40
+ wedata_feature_engineering-0.1.6.dist-info/METADATA,sha256=orxNq_A9F8FcSWYn6wTY1pQ2KtqNVIREvGziUnNa1ys,493
41
+ wedata_feature_engineering-0.1.6.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
42
+ wedata_feature_engineering-0.1.6.dist-info/top_level.txt,sha256=Xa0v1rh__RvfVTVDirW5r5UBKg7ZO_iuTeXfp8MNo2A,7
43
+ wedata_feature_engineering-0.1.6.dist-info/RECORD,,
@@ -1,164 +0,0 @@
1
- from typing import Dict
2
-
3
-
4
-
5
- class FeatureTable:
6
- """
7
- .. note::
8
-
9
- Aliases:`!databricks.feature_engineering.entities.feature_table.FeatureTable`, `!databricks.feature_store.entities.feature_table.FeatureTable`
10
-
11
- Value class describing one feature table.
12
-
13
- This will typically not be instantiated directly, instead the
14
- :meth:`create_table() <databricks.feature_engineering.client.FeatureEngineeringClient.create_table>`
15
- will create :class:`.FeatureTable` objects.
16
- """
17
-
18
- def __init__(
19
- self,
20
- name,
21
- table_id,
22
- description,
23
- primary_keys,
24
- partition_columns,
25
- features,
26
- creation_timestamp=None,
27
- online_stores=None,
28
- notebook_producers=None,
29
- job_producers=None,
30
- table_data_sources=None,
31
- path_data_sources=None,
32
- custom_data_sources=None,
33
- timestamp_keys=None,
34
- tags=None,
35
- ):
36
- """Initialize a FeatureTable object."""
37
- self.name = name
38
- self.table_id = table_id
39
- self.description = description
40
- self.primary_keys = primary_keys
41
- self.partition_columns = partition_columns
42
- self.features = features
43
- self.creation_timestamp = creation_timestamp
44
- self.online_stores = online_stores if online_stores is not None else []
45
- self.notebook_producers = (
46
- notebook_producers if notebook_producers is not None else []
47
- )
48
- self.job_producers = job_producers if job_producers is not None else []
49
- self.table_data_sources = (
50
- table_data_sources if table_data_sources is not None else []
51
- )
52
- self.path_data_sources = (
53
- path_data_sources if path_data_sources is not None else []
54
- )
55
- self.custom_data_sources = (
56
- custom_data_sources if custom_data_sources is not None else []
57
- )
58
- self.timestamp_keys = timestamp_keys if timestamp_keys is not None else []
59
- self._tags = tags
60
-
61
- # @property
62
- # @deprecated("FeatureTable.primary_keys", since="v0.3.6")
63
- # def keys(self):
64
- # return self.primary_keys
65
-
66
- @property
67
- def tags(self) -> Dict[str, str]:
68
- """
69
- Get the tags associated with the feature table.
70
-
71
- :return a Dictionary of all tags associated with the feature table as key/value pairs
72
- """
73
- if self._tags is None:
74
- # If no tags are set, self._tags is expected an empty dictionary.
75
- raise ValueError(
76
- "Internal error: tags have not been fetched for this FeatureTable instance"
77
- )
78
- return self._tags
79
-
80
-
81
- @classmethod
82
- def from_uc_get_table_response(cls, uc_get_table_response: Dict[str, object]):
83
- """Return a FeatureStore object from a UC get_table response. Note: UC does not return online_stores or tags.
84
-
85
- :param dict uc_get_table_response: A dictionary representing a UC get_table response.
86
- :return FeatureTable: a FeatureStore object from the UC response.
87
- """
88
- table_name = uc_get_table_response["full_name"]
89
-
90
- if uc_get_table_response["securable_kind"] == "TABLE_ONLINE_VIEW":
91
- source_table = uc_get_table_response["properties_pairs"]["properties"][
92
- "source_table"
93
- ]
94
- raise ValueError(
95
- f"Table '{table_name}' is an online view. Online Views are not feature tables. Please use the source table '{source_table}' instead."
96
- )
97
-
98
- if (
99
- "table_type" in uc_get_table_response
100
- and uc_get_table_response["table_type"] == "VIEW"
101
- ):
102
- return cls(
103
- name=table_name,
104
- table_id=uc_get_table_response["table_id"],
105
- description=uc_get_table_response["comment"]
106
- if "comment" in uc_get_table_response
107
- else "",
108
- primary_keys=[],
109
- partition_columns=[],
110
- features=[],
111
- creation_timestamp=uc_get_table_response["created_at"],
112
- timestamp_keys=[],
113
- )
114
-
115
- table_constraints = (
116
- uc_get_table_response["table_constraints"]
117
- if "table_constraints" in uc_get_table_response
118
- else []
119
- )
120
- primary_key_constraints = [
121
- c for c in table_constraints if "primary_key_constraint" in c
122
- ]
123
- if len(primary_key_constraints) == 0:
124
- raise ValueError(
125
- "Table can't be used as a feature table because it has no primary key constraint defined."
126
- + " Use 'ALTER TABLE table_name ADD CONSTRAINT table_name_pk PRIMARY KEY( key_column [,...] )'"
127
- + " to add a primary key constraint on the table."
128
- )
129
- primary_key_constraint = primary_key_constraint = primary_key_constraints[0][
130
- "primary_key_constraint"
131
- ]
132
- timestamp_keys = (
133
- primary_key_constraint["timeseries_columns"]
134
- if "timeseries_columns" in primary_key_constraint
135
- else []
136
- )
137
- primary_keys = [
138
- c
139
- for c in primary_key_constraint["child_columns"]
140
- if c not in timestamp_keys
141
- ]
142
-
143
- columns = uc_get_table_response["columns"]
144
- features = [c["name"] for c in columns]
145
- partition_columns_unordered = [c for c in columns if "partition_index" in c]
146
- partition_columns = [
147
- c["name"]
148
- for c in sorted(
149
- partition_columns_unordered, key=lambda x: x["partition_index"]
150
- )
151
- ]
152
-
153
- return cls(
154
- name=table_name,
155
- table_id=uc_get_table_response["table_id"],
156
- description=uc_get_table_response["comment"]
157
- if "comment" in uc_get_table_response
158
- else "",
159
- primary_keys=primary_keys,
160
- partition_columns=partition_columns,
161
- features=features,
162
- creation_timestamp=uc_get_table_response["created_at"],
163
- timestamp_keys=timestamp_keys,
164
- )