wedata-feature-engineering 0.1.3__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5}/PKG-INFO +1 -1
  2. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5}/setup.py +8 -2
  3. {wedata-feature-engineering-0.1.3/feature_store → wedata-feature-engineering-0.1.5/wedata}/__init__.py +1 -1
  4. wedata-feature-engineering-0.1.5/wedata/feature_store/training_set_client/__init__.py +0 -0
  5. wedata-feature-engineering-0.1.5/wedata/feature_store/utils/__init__.py +0 -0
  6. wedata-feature-engineering-0.1.5/wedata/feature_store/utils/common_utils.py +96 -0
  7. wedata-feature-engineering-0.1.5/wedata/feature_store/utils/feature_lookup_utils.py +570 -0
  8. wedata-feature-engineering-0.1.5/wedata/feature_store/utils/feature_spec_utils.py +286 -0
  9. wedata-feature-engineering-0.1.5/wedata/feature_store/utils/feature_utils.py +73 -0
  10. wedata-feature-engineering-0.1.5/wedata/feature_store/utils/schema_utils.py +117 -0
  11. wedata-feature-engineering-0.1.5/wedata/feature_store/utils/topological_sort.py +158 -0
  12. wedata-feature-engineering-0.1.5/wedata/feature_store/utils/training_set_utils.py +580 -0
  13. wedata-feature-engineering-0.1.5/wedata/feature_store/utils/uc_utils.py +281 -0
  14. wedata-feature-engineering-0.1.5/wedata/feature_store/utils/utils.py +252 -0
  15. wedata-feature-engineering-0.1.5/wedata/feature_store/utils/validation_utils.py +55 -0
  16. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5}/wedata_feature_engineering.egg-info/PKG-INFO +1 -1
  17. wedata-feature-engineering-0.1.5/wedata_feature_engineering.egg-info/SOURCES.txt +45 -0
  18. wedata-feature-engineering-0.1.5/wedata_feature_engineering.egg-info/top_level.txt +1 -0
  19. wedata-feature-engineering-0.1.3/wedata_feature_engineering.egg-info/SOURCES.txt +0 -33
  20. wedata-feature-engineering-0.1.3/wedata_feature_engineering.egg-info/top_level.txt +0 -1
  21. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5}/README.md +0 -0
  22. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5}/setup.cfg +0 -0
  23. {wedata-feature-engineering-0.1.3/feature_store/constants → wedata-feature-engineering-0.1.5/wedata/feature_store}/__init__.py +0 -0
  24. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/client.py +0 -0
  25. {wedata-feature-engineering-0.1.3/feature_store/entities → wedata-feature-engineering-0.1.5/wedata/feature_store/constants}/__init__.py +0 -0
  26. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/constants/constants.py +0 -0
  27. {wedata-feature-engineering-0.1.3/feature_store/feature_table_client → wedata-feature-engineering-0.1.5/wedata/feature_store/entities}/__init__.py +0 -0
  28. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/column_info.py +0 -0
  29. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/data_type.py +0 -0
  30. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/environment_variables.py +0 -0
  31. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/feature.py +0 -0
  32. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/feature_column_info.py +0 -0
  33. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/feature_function.py +0 -0
  34. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/feature_lookup.py +0 -0
  35. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/feature_spec.py +0 -0
  36. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/feature_spec_constants.py +0 -0
  37. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/feature_table.py +0 -0
  38. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/feature_table_info.py +0 -0
  39. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/function_info.py +0 -0
  40. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/on_demand_column_info.py +0 -0
  41. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/source_data_column_info.py +0 -0
  42. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/entities/training_set.py +0 -0
  43. {wedata-feature-engineering-0.1.3/feature_store/spark_client → wedata-feature-engineering-0.1.5/wedata/feature_store/feature_table_client}/__init__.py +0 -0
  44. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/feature_table_client/feature_table_client.py +0 -0
  45. {wedata-feature-engineering-0.1.3/feature_store/training_set_client → wedata-feature-engineering-0.1.5/wedata/feature_store/spark_client}/__init__.py +0 -0
  46. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/spark_client/spark_client.py +0 -0
  47. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5/wedata}/feature_store/training_set_client/training_set_client.py +0 -0
  48. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5}/wedata_feature_engineering.egg-info/dependency_links.txt +0 -0
  49. {wedata-feature-engineering-0.1.3 → wedata-feature-engineering-0.1.5}/wedata_feature_engineering.egg-info/requires.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wedata-feature-engineering
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Wedata Feature Engineering Library
5
5
  Home-page:
6
6
  Author: meahqian
@@ -1,9 +1,15 @@
1
1
  from setuptools import setup, find_packages
2
+ import os
3
+
4
+ # Dynamically read version from wedata/__init__.py
5
+ version = {}
6
+ with open(os.path.join(os.path.dirname(__file__), 'wedata', '__init__.py')) as f:
7
+ exec(f.read(), version)
2
8
 
3
9
  setup(
4
10
  name="wedata-feature-engineering",
5
- version="0.1.3",
6
- packages=find_packages(exclude=["tests*"]),
11
+ version=version["__version__"],
12
+ packages=find_packages(include=['wedata', 'wedata.*']),
7
13
  install_requires=[
8
14
  'pyspark>=3.0.0',
9
15
  'delta-spark>=1.0.0',
@@ -3,4 +3,4 @@ WeData Feature Engineering
3
3
  A toolkit for automated feature engineering
4
4
  """
5
5
 
6
- __version__ = "0.1.0"
6
+ __version__ = "0.1.5"
@@ -0,0 +1,96 @@
1
+ """
2
+ 通用工具函数
3
+ """
4
+
5
+ from collections import Counter
6
+ from typing import Any, List
7
+
8
+ from mlflow.store.artifact.models_artifact_repo import ModelsArtifactRepository
9
+ from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository
10
+
11
+
12
+ def is_artifact_uri(uri):
13
+ """
14
+ Checks the artifact URI is associated with a MLflow model or run.
15
+ The actual URI can be a model URI, model URI + subdirectory, or model URI + path to artifact file.
16
+ """
17
+ return ModelsArtifactRepository.is_models_uri(
18
+ uri
19
+ ) or RunsArtifactRepository.is_runs_uri(uri)
20
+
21
+ def as_list(obj, default=None):
22
+ if not obj:
23
+ return default
24
+ elif isinstance(obj, list):
25
+ return obj
26
+ else:
27
+ return [obj]
28
+
29
+ def get_duplicates(elements: List[Any]) -> List[Any]:
30
+ """
31
+ Returns duplicate elements in the order they first appear.
32
+ """
33
+ element_counts = Counter(elements)
34
+ duplicates = []
35
+ for e in element_counts.keys():
36
+ if element_counts[e] > 1:
37
+ duplicates.append(e)
38
+ return duplicates
39
+
40
+ def validate_strings_unique(strings: List[str], error_template: str):
41
+ """
42
+ Validates all strings are unique, otherwise raise ValueError with the error template and duplicates.
43
+ Passes single-quoted, comma delimited duplicates to the error template.
44
+ """
45
+ duplicate_strings = get_duplicates(strings)
46
+ if duplicate_strings:
47
+ duplicates_formatted = ", ".join([f"'{s}'" for s in duplicate_strings])
48
+ raise ValueError(error_template.format(duplicates_formatted))
49
+
50
+ def sanitize_identifier(identifier: str):
51
+ """
52
+ Sanitize and wrap an identifier with backquotes. For example, "a`b" becomes "`a``b`".
53
+ Use this function to sanitize identifiers such as column names in SQL and PySpark.
54
+ """
55
+ return f"`{identifier.replace('`', '``')}`"
56
+
57
+
58
+ def sanitize_identifiers(identifiers: List[str]):
59
+ """
60
+ Sanitize and wrap the identifiers in a list with backquotes.
61
+ """
62
+ return [sanitize_identifier(i) for i in identifiers]
63
+
64
+
65
+ def sanitize_multi_level_name(multi_level_name: str):
66
+ """
67
+ Sanitize a multi-level name (such as an Unity Catalog table name) by sanitizing each segment
68
+ and joining the results. For example, "ca+t.fo`o.ba$r" becomes "`ca+t`.`fo``o`.`ba$r`".
69
+ """
70
+ segments = multi_level_name.split(".")
71
+ return ".".join(sanitize_identifiers(segments))
72
+
73
+
74
+ def unsanitize_identifier(identifier: str):
75
+ """
76
+ Unsanitize an identifier. Useful when we get a possibly sanitized identifier from Spark or
77
+ somewhere else, but we need an unsanitized one.
78
+ Note: This function does not check the correctness of the identifier passed in. e.g. `foo``
79
+ is not a valid sanitized identifier. When given such invalid input, this function returns
80
+ invalid output.
81
+ """
82
+ if len(identifier) >= 2 and identifier[0] == "`" and identifier[-1] == "`":
83
+ return identifier[1:-1].replace("``", "`")
84
+ else:
85
+ return identifier
86
+
87
+
88
+ # strings containing \ or ' can break sql statements, so escape them.
89
+ def escape_sql_string(input_str: str) -> str:
90
+ return input_str.replace("\\", "\\\\").replace("'", "\\'")
91
+
92
+ def get_unique_list_order(elements: List[Any]) -> List[Any]:
93
+ """
94
+ Returns unique elements in the order they first appear.
95
+ """
96
+ return list(dict.fromkeys(elements))