tencent-wedata-feature-engineering-dev 0.1.42__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. tencent_wedata_feature_engineering_dev-0.2.3/PKG-INFO +30 -0
  2. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/setup.py +5 -2
  3. tencent_wedata_feature_engineering_dev-0.2.3/tencent_wedata_feature_engineering_dev.egg-info/PKG-INFO +30 -0
  4. tencent_wedata_feature_engineering_dev-0.2.3/tencent_wedata_feature_engineering_dev.egg-info/SOURCES.txt +83 -0
  5. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/tencent_wedata_feature_engineering_dev.egg-info/requires.txt +6 -0
  6. tencent_wedata_feature_engineering_dev-0.2.3/tests/test_common_utils.py +12 -0
  7. tencent_wedata_feature_engineering_dev-0.2.3/tests/test_feature_store.py +388 -0
  8. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/__init__.py +1 -1
  9. tencent_wedata_feature_engineering_dev-0.2.3/wedata/common/base_table_client/__init__.py +1 -0
  10. tencent_wedata_feature_engineering_dev-0.2.3/wedata/common/base_table_client/base.py +58 -0
  11. tencent_wedata_feature_engineering_dev-0.2.3/wedata/common/cloud_sdk_client/__init__.py +2 -0
  12. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/cloud_sdk_client/client.py +56 -12
  13. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/cloud_sdk_client/models.py +212 -37
  14. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/cloud_sdk_client/utils.py +14 -0
  15. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/constants/constants.py +3 -2
  16. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/column_info.py +6 -5
  17. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/feature_column_info.py +2 -1
  18. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/feature_lookup.py +1 -1
  19. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/feature_spec.py +9 -9
  20. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/feature_table_info.py +1 -1
  21. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/function_info.py +2 -1
  22. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/on_demand_column_info.py +2 -1
  23. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/source_data_column_info.py +3 -1
  24. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/training_set.py +6 -6
  25. tencent_wedata_feature_engineering_dev-0.2.3/wedata/common/feast_client/__init__.py +1 -0
  26. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/feast_client/feast_client.py +3 -4
  27. tencent_wedata_feature_engineering_dev-0.2.3/wedata/common/log/__init__.py +1 -0
  28. tencent_wedata_feature_engineering_dev-0.2.3/wedata/common/log/logger.py +44 -0
  29. tencent_wedata_feature_engineering_dev-0.2.3/wedata/common/spark_client/__init__.py +1 -0
  30. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/spark_client/spark_client.py +6 -9
  31. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/common_utils.py +7 -9
  32. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/env_utils.py +31 -10
  33. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/feature_lookup_utils.py +6 -6
  34. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/feature_spec_utils.py +6 -6
  35. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/feature_utils.py +5 -5
  36. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/on_demand_utils.py +5 -4
  37. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/schema_utils.py +1 -1
  38. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/signature_utils.py +4 -4
  39. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/training_set_utils.py +13 -13
  40. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/uc_utils.py +1 -1
  41. tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_engineering/__init__.py +1 -0
  42. tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_engineering/client.py +417 -0
  43. tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_engineering/ml_training_client/ml_training_client.py +569 -0
  44. tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_engineering/mlflow_model.py +9 -0
  45. tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_engineering/table_client/table_client.py +548 -0
  46. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/feature_store/client.py +13 -16
  47. tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_store/constants/engine_types.py +12 -0
  48. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/feature_store/feature_table_client/feature_table_client.py +98 -108
  49. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/feature_store/training_set_client/training_set_client.py +14 -17
  50. tencent_wedata_feature_engineering_dev-0.2.3/wedata/tempo/__init__.py +0 -0
  51. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/tempo/interpol.py +2 -2
  52. tencent-wedata-feature-engineering-dev-0.1.42/PKG-INFO +0 -13
  53. tencent-wedata-feature-engineering-dev-0.1.42/tencent_wedata_feature_engineering_dev.egg-info/PKG-INFO +0 -13
  54. tencent-wedata-feature-engineering-dev-0.1.42/tencent_wedata_feature_engineering_dev.egg-info/SOURCES.txt +0 -67
  55. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/README.md +0 -0
  56. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/setup.cfg +0 -0
  57. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/tencent_wedata_feature_engineering_dev.egg-info/dependency_links.txt +0 -0
  58. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/tencent_wedata_feature_engineering_dev.egg-info/top_level.txt +0 -0
  59. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/__init__.py +0 -0
  60. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store/cloud_sdk_client → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common/constants}/__init__.py +0 -0
  61. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/constants/engine_types.py +0 -0
  62. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store/common → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common/entities}/__init__.py +0 -0
  63. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/environment_variables.py +0 -0
  64. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/feature.py +0 -0
  65. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/feature_function.py +0 -0
  66. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/feature_spec_constants.py +0 -0
  67. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/entities/feature_table.py +0 -0
  68. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata}/common/protos/__init__.py +0 -0
  69. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata}/common/protos/feature_store_pb2.py +0 -0
  70. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store/common/store_config → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common/utils}/__init__.py +0 -0
  71. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/topological_sort.py +0 -0
  72. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store → tencent_wedata_feature_engineering_dev-0.2.3/wedata/common}/utils/validation_utils.py +0 -0
  73. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store/constants → tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_engineering/ml_training_client}/__init__.py +0 -0
  74. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store/entities → tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_engineering/table_client}/__init__.py +0 -0
  75. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store/feast_client → tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_store}/__init__.py +0 -0
  76. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store/feature_table_client → tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_store/common}/__init__.py +0 -0
  77. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store/spark_client → tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_store/common/store_config}/__init__.py +0 -0
  78. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/feature_store/common/store_config/redis.py +0 -0
  79. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store/training_set_client → tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_store/constants}/__init__.py +0 -0
  80. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/feature_store/utils → tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_store/feature_table_client}/__init__.py +0 -0
  81. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/feature_store/mlflow_model.py +0 -0
  82. {tencent-wedata-feature-engineering-dev-0.1.42/wedata/tempo → tencent_wedata_feature_engineering_dev-0.2.3/wedata/feature_store/training_set_client}/__init__.py +0 -0
  83. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/tempo/intervals.py +0 -0
  84. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/tempo/io.py +0 -0
  85. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/tempo/ml.py +0 -0
  86. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/tempo/resample.py +0 -0
  87. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/tempo/tsdf.py +0 -0
  88. {tencent-wedata-feature-engineering-dev-0.1.42 → tencent_wedata_feature_engineering_dev-0.2.3}/wedata/tempo/utils.py +0 -0
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: tencent-wedata-feature-engineering-dev
3
+ Version: 0.2.3
4
+ Summary: Wedata Feature Engineering Library Development
5
+ Home-page:
6
+ Author: meahqian
7
+ Author-email:
8
+ License: Apache 2.0
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.7
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: pandas>=1.0.0
15
+ Requires-Dist: feast[redis]==0.49.0
16
+ Requires-Dist: grpcio==1.74.0
17
+ Requires-Dist: tencentcloud-sdk-python
18
+ Requires-Dist: ipython
19
+ Provides-Extra: mlflow2
20
+ Requires-Dist: mlflow==2.17.2; extra == "mlflow2"
21
+ Provides-Extra: mlflow3
22
+ Requires-Dist: mlflow==3.1.0; extra == "mlflow3"
23
+ Dynamic: author
24
+ Dynamic: classifier
25
+ Dynamic: description-content-type
26
+ Dynamic: license
27
+ Dynamic: provides-extra
28
+ Dynamic: requires-dist
29
+ Dynamic: requires-python
30
+ Dynamic: summary
@@ -12,11 +12,14 @@ setup(
12
12
  packages=find_packages(include=['wedata', 'wedata.*']),
13
13
  install_requires=[
14
14
  'pandas>=1.0.0',
15
- 'feast[redis]==0.49.0',
16
- 'grpcio==1.74.0',
15
+ 'feast[redis]==0.49.0', 'grpcio==1.74.0',
17
16
  'tencentcloud-sdk-python',
18
17
  'ipython'
19
18
  ],
19
+ extras_require={
20
+ 'mlflow2': ['mlflow==2.17.2',],
21
+ 'mlflow3': ['mlflow==3.1.0'],
22
+ },
20
23
  python_requires='>=3.7',
21
24
  author="meahqian",
22
25
  author_email="",
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: tencent-wedata-feature-engineering-dev
3
+ Version: 0.2.3
4
+ Summary: Wedata Feature Engineering Library Development
5
+ Home-page:
6
+ Author: meahqian
7
+ Author-email:
8
+ License: Apache 2.0
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.7
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: pandas>=1.0.0
15
+ Requires-Dist: feast[redis]==0.49.0
16
+ Requires-Dist: grpcio==1.74.0
17
+ Requires-Dist: tencentcloud-sdk-python
18
+ Requires-Dist: ipython
19
+ Provides-Extra: mlflow2
20
+ Requires-Dist: mlflow==2.17.2; extra == "mlflow2"
21
+ Provides-Extra: mlflow3
22
+ Requires-Dist: mlflow==3.1.0; extra == "mlflow3"
23
+ Dynamic: author
24
+ Dynamic: classifier
25
+ Dynamic: description-content-type
26
+ Dynamic: license
27
+ Dynamic: provides-extra
28
+ Dynamic: requires-dist
29
+ Dynamic: requires-python
30
+ Dynamic: summary
@@ -0,0 +1,83 @@
1
+ README.md
2
+ setup.py
3
+ tencent_wedata_feature_engineering_dev.egg-info/PKG-INFO
4
+ tencent_wedata_feature_engineering_dev.egg-info/SOURCES.txt
5
+ tencent_wedata_feature_engineering_dev.egg-info/dependency_links.txt
6
+ tencent_wedata_feature_engineering_dev.egg-info/requires.txt
7
+ tencent_wedata_feature_engineering_dev.egg-info/top_level.txt
8
+ tests/test_common_utils.py
9
+ tests/test_feature_store.py
10
+ wedata/__init__.py
11
+ wedata/common/__init__.py
12
+ wedata/common/base_table_client/__init__.py
13
+ wedata/common/base_table_client/base.py
14
+ wedata/common/cloud_sdk_client/__init__.py
15
+ wedata/common/cloud_sdk_client/client.py
16
+ wedata/common/cloud_sdk_client/models.py
17
+ wedata/common/cloud_sdk_client/utils.py
18
+ wedata/common/constants/__init__.py
19
+ wedata/common/constants/constants.py
20
+ wedata/common/constants/engine_types.py
21
+ wedata/common/entities/__init__.py
22
+ wedata/common/entities/column_info.py
23
+ wedata/common/entities/environment_variables.py
24
+ wedata/common/entities/feature.py
25
+ wedata/common/entities/feature_column_info.py
26
+ wedata/common/entities/feature_function.py
27
+ wedata/common/entities/feature_lookup.py
28
+ wedata/common/entities/feature_spec.py
29
+ wedata/common/entities/feature_spec_constants.py
30
+ wedata/common/entities/feature_table.py
31
+ wedata/common/entities/feature_table_info.py
32
+ wedata/common/entities/function_info.py
33
+ wedata/common/entities/on_demand_column_info.py
34
+ wedata/common/entities/source_data_column_info.py
35
+ wedata/common/entities/training_set.py
36
+ wedata/common/feast_client/__init__.py
37
+ wedata/common/feast_client/feast_client.py
38
+ wedata/common/log/__init__.py
39
+ wedata/common/log/logger.py
40
+ wedata/common/protos/__init__.py
41
+ wedata/common/protos/feature_store_pb2.py
42
+ wedata/common/spark_client/__init__.py
43
+ wedata/common/spark_client/spark_client.py
44
+ wedata/common/utils/__init__.py
45
+ wedata/common/utils/common_utils.py
46
+ wedata/common/utils/env_utils.py
47
+ wedata/common/utils/feature_lookup_utils.py
48
+ wedata/common/utils/feature_spec_utils.py
49
+ wedata/common/utils/feature_utils.py
50
+ wedata/common/utils/on_demand_utils.py
51
+ wedata/common/utils/schema_utils.py
52
+ wedata/common/utils/signature_utils.py
53
+ wedata/common/utils/topological_sort.py
54
+ wedata/common/utils/training_set_utils.py
55
+ wedata/common/utils/uc_utils.py
56
+ wedata/common/utils/validation_utils.py
57
+ wedata/feature_engineering/__init__.py
58
+ wedata/feature_engineering/client.py
59
+ wedata/feature_engineering/mlflow_model.py
60
+ wedata/feature_engineering/ml_training_client/__init__.py
61
+ wedata/feature_engineering/ml_training_client/ml_training_client.py
62
+ wedata/feature_engineering/table_client/__init__.py
63
+ wedata/feature_engineering/table_client/table_client.py
64
+ wedata/feature_store/__init__.py
65
+ wedata/feature_store/client.py
66
+ wedata/feature_store/mlflow_model.py
67
+ wedata/feature_store/common/__init__.py
68
+ wedata/feature_store/common/store_config/__init__.py
69
+ wedata/feature_store/common/store_config/redis.py
70
+ wedata/feature_store/constants/__init__.py
71
+ wedata/feature_store/constants/engine_types.py
72
+ wedata/feature_store/feature_table_client/__init__.py
73
+ wedata/feature_store/feature_table_client/feature_table_client.py
74
+ wedata/feature_store/training_set_client/__init__.py
75
+ wedata/feature_store/training_set_client/training_set_client.py
76
+ wedata/tempo/__init__.py
77
+ wedata/tempo/interpol.py
78
+ wedata/tempo/intervals.py
79
+ wedata/tempo/io.py
80
+ wedata/tempo/ml.py
81
+ wedata/tempo/resample.py
82
+ wedata/tempo/tsdf.py
83
+ wedata/tempo/utils.py
@@ -3,3 +3,9 @@ feast[redis]==0.49.0
3
3
  grpcio==1.74.0
4
4
  tencentcloud-sdk-python
5
5
  ipython
6
+
7
+ [mlflow2]
8
+ mlflow==2.17.2
9
+
10
+ [mlflow3]
11
+ mlflow==3.1.0
@@ -0,0 +1,12 @@
1
+ from wedata.common.utils import common_utils
2
+
3
+ import os
4
+ import logging
5
+ import mlflow
6
+ logging.basicConfig(level=logging.ERROR)
7
+
8
+ def test_build_full_table_name():
9
+ os.environ["WEDATA_FEATURE_STORE_DATABASE"] = ""
10
+ os.environ["QCLOUD_UIN"] = "test"
11
+ mlflow.sklearn.log_model()
12
+ common_utils.build_full_table_name("test")
@@ -0,0 +1,388 @@
1
+ # This is a test script for FeatureStoreClient
2
+ from datetime import date
3
+
4
+ import pandas as pd
5
+ from pyspark.sql import SparkSession
6
+ from sklearn.ensemble import RandomForestClassifier
7
+
8
+ import mlflow.sklearn
9
+
10
+ from wedata.feature_store.client import FeatureStoreClient
11
+ from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, DateType
12
+
13
+ from wedata.common.entities.feature_lookup import FeatureLookup
14
+ from wedata.common.entities.training_set import TrainingSet
15
+
16
+
17
+ # 创建FeatureStoreClient实例
18
+ def create_client() -> FeatureStoreClient:
19
+ spark = SparkSession.builder \
20
+ .appName("FeatureStoreDemo") \
21
+ .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
22
+ .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
23
+ .config("spark.jars.packages", "io.delta:delta-core_2.12:2.4.0") \
24
+ .enableHiveSupport() \
25
+ .getOrCreate()
26
+
27
+ print(spark.catalog.currentCatalog())
28
+
29
+ # 创建FeatureStoreClient实例
30
+ client = FeatureStoreClient(spark)
31
+ return client
32
+
33
+ # 创建特征表
34
+ def create_table(client: FeatureStoreClient):
35
+ user_data = [
36
+ (1001, 25, "F", 120.5, date(2020, 5, 15)), # user_id, age, gender, avg_purchase, member_since
37
+ (1002, 30, "M", 200.0, date(2019, 3, 10)),
38
+ (1003, 35, "F", 180.3, date(2021, 1, 20))
39
+ ]
40
+
41
+ # 定义schema
42
+ user_schema = StructType([
43
+ StructField("user_id", IntegerType(), False, metadata={"comment": "用户唯一标识ID"}),
44
+ StructField("age", IntegerType(), True, metadata={"comment": "用户年龄"}),
45
+ StructField("gender", StringType(), True, metadata={"comment": "用户性别(F-女性,M-男性)"}),
46
+ StructField("avg_purchase", DoubleType(), True, metadata={"comment": "用户平均消费金额"}),
47
+ StructField("member_since", DateType(), True, metadata={"comment": "用户注册日期"})
48
+ ])
49
+
50
+ # 创建DataFrame
51
+ user_df = client.spark.createDataFrame(user_data, user_schema)
52
+ client.spark.sql("show tables").show()
53
+ display(user_df)
54
+
55
+ client.create_table(
56
+ name="user_features", # 表名
57
+ primary_keys=["user_id"], # 主键
58
+ df=user_df, # 数据
59
+ partition_columns=["member_since"], # 按注册日期分区
60
+ description="用户基本特征和消费行为特征", # 描述
61
+ tags={ # 业务标签
62
+ "create_by": "tencent",
63
+ "sensitivity": "internal"
64
+ }
65
+ )
66
+
67
+ # 商品数据样例
68
+ product_data = [
69
+ (5001, "电子", 599.0, 0.85, date(2024, 1, 1)),
70
+ (5002, "服装", 199.0, 0.92, date(2023, 11, 15)),
71
+ (5003, "家居", 299.0, 0.78, date(2024, 2, 20))
72
+ ]
73
+
74
+ # 定义schema
75
+ product_schema = StructType([
76
+ StructField("product_id", IntegerType(), False),
77
+ StructField("category", StringType(), True),
78
+ StructField("price", DoubleType(), True),
79
+ StructField("popularity", DoubleType(), True),
80
+ StructField("release_date", DateType(), True)
81
+ ])
82
+
83
+ # 创建DataFrame
84
+ product_df = client.spark.createDataFrame(product_data, product_schema)
85
+
86
+
87
+ display(product_df)
88
+
89
+ # 创建商品特征表
90
+ client.create_table(
91
+ name="product_features",
92
+ primary_keys=["product_id"],
93
+ df=product_df,
94
+ description="商品基本属性和受欢迎程度",
95
+ tags={ # 业务标签
96
+ "feature_table": "true",
97
+ "sensitivity": "internal"
98
+ }
99
+ )
100
+
101
+
102
+ # 追加写入数据
103
+ def append_data(client: FeatureStoreClient):
104
+ user_data = [
105
+ (1004, 45, "F", 120.5, date(2020, 5, 15)),
106
+ (1005, 55, "M", 200.0, date(2019, 3, 10)),
107
+ (1006, 65, "F", 180.3, date(2021, 1, 20))
108
+ ]
109
+
110
+ user_schema = StructType([
111
+ StructField("user_id", IntegerType(), False, metadata={"comment": "用户唯一标识ID"}),
112
+ StructField("age", IntegerType(), True, metadata={"comment": "用户年龄"}),
113
+ StructField("gender", StringType(), True, metadata={"comment": "用户性别(F-女性,M-男性)"}),
114
+ StructField("avg_purchase", DoubleType(), True, metadata={"comment": "用户平均消费金额"}),
115
+ StructField("member_since", DateType(), True, metadata={"comment": "用户注册日期"})
116
+ ])
117
+
118
+ user_df = client.spark.createDataFrame(user_data, user_schema)
119
+
120
+ display(user_df)
121
+
122
+ client.write_table(
123
+ name="user_features",
124
+ df=user_df,
125
+ mode="append"
126
+ )
127
+
128
+ product_data = [
129
+ (5007, "食品", 599.0, 0.85, date(2024, 1, 1)),
130
+ (5008, "玩具", 199.0, 0.92, date(2023, 11, 15)),
131
+ (5009, "电脑", 299.0, 0.78, date(2024, 2, 20))
132
+ ]
133
+
134
+ product_schema = StructType([
135
+ StructField("product_id", IntegerType(), False, metadata={"comment": "商品唯一标识ID"}),
136
+ StructField("category", StringType(), True, metadata={"comment": "商品类别"}),
137
+ StructField("price", DoubleType(), True, metadata={"comment": "商品价格(元)"}),
138
+ StructField("popularity", DoubleType(), True, metadata={"comment": "商品受欢迎程度(0-1)"}),
139
+ StructField("release_date", DateType(), True, metadata={"comment": "商品发布日期"})
140
+ ])
141
+
142
+ product_df = client.spark.createDataFrame(product_data, product_schema)
143
+
144
+ display(product_df)
145
+
146
+ client.write_table(
147
+ name="product_features",
148
+ df=product_df,
149
+ mode="append"
150
+ )
151
+
152
+ # 读取特征表数据
153
+ def read_table(client: FeatureStoreClient):
154
+
155
+ # 读取用户特征表
156
+ user_df = client.read_table("user_features")
157
+ display(user_df)
158
+
159
+ # 读取商品特征表
160
+ product_df = client.read_table("product_features")
161
+ display(product_df)
162
+
163
+ # 获取特征表元数据
164
+ def get_table(client: FeatureStoreClient):
165
+ feature_table_user = client.get_table(name="user_features")
166
+ print(feature_table_user)
167
+
168
+
169
+ # 创建训练集
170
+ def create_training_set(client: FeatureStoreClient) -> TrainingSet:
171
+
172
+ # 订单数据样例
173
+ order_data = [
174
+ (9001, 1001, 5001, date(2025, 3, 1), 1, 0),
175
+ (9002, 1002, 5002, date(2025, 3, 2), 2, 1),
176
+ (9003, 1003, 5003, date(2025, 3, 3), 1, 0)
177
+ ]
178
+
179
+ # 定义schema
180
+ order_schema = StructType([
181
+ StructField("order_id", IntegerType(), False, metadata={"comment": "订单唯一标识ID"}),
182
+ StructField("user_id", IntegerType(), True, metadata={"comment": "用户ID"}),
183
+ StructField("product_id", IntegerType(), True, metadata={"comment": "商品ID"}),
184
+ StructField("order_date", DateType(), True, metadata={"comment": "订单日期"}),
185
+ StructField("quantity", IntegerType(), True, metadata={"comment": "购买数量"}),
186
+ StructField("is_returned", IntegerType(), True, metadata={"comment": "是否退货(0-未退货,1-已退货)"})
187
+ ])
188
+
189
+ # 创建DataFrame
190
+ order_df = client.spark.createDataFrame(order_data, order_schema)
191
+
192
+ # 查看订单数据
193
+ display(order_df)
194
+
195
+ # 定义用户特征查找
196
+ user_feature_lookup = FeatureLookup(
197
+ table_name="user_features",
198
+ feature_names=["age", "gender", "avg_purchase"], # 选择需要的特征列
199
+ lookup_key="user_id" # 关联键
200
+ )
201
+
202
+ # 定义商品特征查找
203
+ product_feature_lookup = FeatureLookup(
204
+ table_name="product_features",
205
+ feature_names=["category", "price", "popularity"], # 选择需要的特征列
206
+ lookup_key="product_id" # 关联键
207
+ )
208
+
209
+ # 创建训练集
210
+ training_set = client.create_training_set(
211
+ df=order_df, # 基础数据
212
+ feature_lookups=[user_feature_lookup, product_feature_lookup], # 特征查找配置
213
+ label="is_returned", # 标签列
214
+ exclude_columns=["order_id"] # 排除不需要的列
215
+ )
216
+
217
+ # 获取最终的训练DataFrame
218
+ training_df = training_set.load_df()
219
+
220
+ # 查看训练数据
221
+ display(training_df)
222
+
223
+ return training_set
224
+
225
+
226
+ # 查看df中数据
227
+ def display(df):
228
+
229
+ """
230
+ 打印DataFrame的结构和数据
231
+
232
+ 参数:
233
+ df (DataFrame): 要打印的Spark DataFrame
234
+ num_rows (int): 要显示的行数,默认为20
235
+ truncate (bool): 是否截断过长的列,默认为True
236
+ """
237
+ # 打印表结构
238
+ print("=== 表结构 ===")
239
+ df.printSchema()
240
+
241
+ # 打印数据
242
+ print("\n=== 数据示例 ===")
243
+ df.show(20, True)
244
+
245
+ # 打印行数统计
246
+ print(f"\n总行数: {df.count()}")
247
+
248
+
249
+ def log_model(client: FeatureStoreClient,
250
+ training_set: TrainingSet
251
+ ):
252
+
253
+ # 初始化模型
254
+ model = RandomForestClassifier(
255
+ n_estimators=100, # 增加树的数量提高模型稳定性
256
+ random_state=42 # 固定随机种子保证可复现性
257
+ )
258
+
259
+ # 获取训练数据并转换为Pandas格式
260
+ train_pd = training_set.load_df().toPandas()
261
+
262
+ # 特征工程处理
263
+ # 1. 处理分类特征
264
+ train_pd['gender'] = train_pd['gender'].map({'F': 0, 'M': 1})
265
+ train_pd = pd.get_dummies(train_pd, columns=['category'])
266
+
267
+ # 2. 处理日期特征(转换为距今天数)
268
+ current_date = pd.to_datetime('2025-04-19') # 使用参考信息中的当前时间
269
+ train_pd['order_days'] = (current_date - pd.to_datetime(train_pd['order_date'])).dt.days
270
+ train_pd = train_pd.drop('order_date', axis=1)
271
+
272
+ # 3. 创建交互特征(价格*数量)
273
+ train_pd['total_amount'] = train_pd['price'] * train_pd['quantity']
274
+
275
+ # 分离特征和标签
276
+ X = train_pd.drop("is_returned", axis=1)
277
+ y = train_pd["is_returned"]
278
+
279
+ # 训练模型
280
+ model.fit(X, y)
281
+ # 记录模型到MLflow
282
+ with mlflow.start_run():
283
+ client.log_model(
284
+ model=model,
285
+ artifact_path="return_prediction_model", # 更符合业务场景的路径名
286
+ flavor=mlflow.sklearn,
287
+ training_set=training_set,
288
+ registered_model_name="product_return_prediction_model" # 更准确的模型名称
289
+ )
290
+
291
+ def log_model(client: FeatureStoreClient,
292
+ training_set: TrainingSet
293
+ ):
294
+ """
295
+ 训练并记录商品退货预测模型
296
+
297
+ 参数:
298
+ client: FeatureStoreClient实例
299
+ training_set: 训练集对象
300
+
301
+ 返回:
302
+
303
+ """
304
+ # 获取数据并转换为Pandas格式
305
+ train_pd = training_set.load_df().toPandas()
306
+
307
+ # 仅做最基本的特征处理
308
+ train_pd['gender'] = train_pd['gender'].map({'F': 0, 'M': 1})
309
+
310
+ # 分离特征和标签
311
+ X = train_pd[['age', 'gender', 'avg_purchase', 'price', 'popularity']] # 只使用基本特征
312
+ y = train_pd["is_returned"]
313
+
314
+ # 使用默认参数的随机森林
315
+ model = RandomForestClassifier(random_state=42)
316
+ model.fit(X, y)
317
+
318
+ # 记录模型
319
+ with mlflow.start_run():
320
+ client.log_model(
321
+ model=model,
322
+ artifact_path="return_prediction_model", # 业务场景的路径名
323
+ flavor=mlflow.sklearn,
324
+ training_set=training_set,
325
+ registered_model_name="product_return_prediction_model", # 模型名称
326
+ )
327
+
328
+ def load_model(client: FeatureStoreClient):
329
+ import mlflow
330
+ import logging
331
+
332
+ # 配置日志
333
+ logging.basicConfig(level=logging.INFO)
334
+ logger = logging.getLogger(__name__)
335
+
336
+ # 模型URI - 应该从配置或环境变量中获取
337
+ logged_model = 'runs:/7ef2294070824daaadec065e1640211f/return_prediction_model'
338
+
339
+ # 加载模型
340
+ logger.info("正在加载MLflow模型...")
341
+ loaded_model = mlflow.pyfunc.load_model(logged_model)
342
+
343
+ # 定义测试数据schema
344
+ new_schema = StructType([
345
+ StructField("age", IntegerType(), True, metadata={"comment": "用户年龄"}),
346
+ StructField("gender", StringType(), True, metadata={"comment": "用户性别(F-女性,M-男性)"}),
347
+ StructField("avg_purchase", DoubleType(), True, metadata={"comment": "用户平均消费金额"}),
348
+ #StructField("category", StringType(), True, metadata={"comment": "商品类别"}),
349
+ StructField("price", DoubleType(), True, metadata={"comment": "商品价格(元)"}),
350
+ StructField("popularity", DoubleType(), True, metadata={"comment": "商品受欢迎程度(0-1)"})
351
+ ])
352
+
353
+ # 测试数据
354
+ new_data = [
355
+ (21, "M", 100.0, 500.0, 0.5),
356
+ (25, "F", 500.0, 100.0, 0.9),
357
+ (31, "M", 1000.0, 100.0, 0.9)
358
+ ]
359
+
360
+ # 创建Spark DataFrame
361
+ p_df = client.spark.createDataFrame(new_data, new_schema)
362
+
363
+ # 转换为Pandas DataFrame并进行必要的数据预处理
364
+ pd_df = p_df.toPandas()
365
+ pd_df = pd_df[['age', 'gender', 'avg_purchase', 'price', 'popularity']]
366
+ pd_df['gender'] = pd_df['gender'].map({'F': 0, 'M': 1})
367
+
368
+ # 执行预测
369
+ logger.info("正在执行预测...")
370
+ predictions = loaded_model.predict(pd_df)
371
+
372
+ print("预测结果:", predictions)
373
+ return predictions
374
+
375
+
376
+
377
+ # Press the green button in the gutter to run the script.
378
+ if __name__ == '__main__':
379
+ client = create_client()
380
+ #create_table(client)
381
+ #append_data(client)
382
+ #read_table(client)
383
+ #get_table(client)
384
+ training_set = create_training_set(client)
385
+ log_model(client, training_set)
386
+
387
+
388
+
@@ -2,7 +2,7 @@
2
2
  WeData Feature Engineering
3
3
  A toolkit for automated feature engineering
4
4
  """
5
- from wedata.feature_store.constants import constants
5
+ from wedata.common.constants import constants
6
6
 
7
7
  __version__ = f"{constants.FEATURE_LOOKUP_CLIENT_MAJOR_VERSION}"
8
8
 
@@ -0,0 +1 @@
1
+ from .base import AbstractBaseTableClient
@@ -0,0 +1,58 @@
1
+
2
+ from typing import Union, List, Optional, Sequence, Any
3
+ from pyspark.sql import DataFrame
4
+ from pyspark.sql.types import StructType
5
+
6
+
7
+ class AbstractBaseTableClient:
8
+
9
+ @staticmethod
10
+ def _normalize_params(
11
+ param: Optional[Union[str, Sequence[str]]],
12
+ default_type: type = list
13
+ ) -> list:
14
+ """统一处理参数标准化"""
15
+ if param is None:
16
+ return default_type()
17
+ return list(param) if isinstance(param, Sequence) else [param]
18
+
19
+ @staticmethod
20
+ def _validate_schema(df: DataFrame, schema: StructType):
21
+ """校验DataFrame和schema的有效性和一致性"""
22
+ # 检查是否同时为空
23
+ if df is None and schema is None:
24
+ raise ValueError("Either DataFrame or schema must be provided")
25
+
26
+ # 检查schema匹配
27
+ if df is not None and schema is not None:
28
+ df_schema = df.schema
29
+ if df_schema != schema:
30
+ diff_fields = set(df_schema.fieldNames()).symmetric_difference(set(schema.fieldNames()))
31
+ raise ValueError(
32
+ f"DataFrame schema does not match. Differences: "
33
+ f"{diff_fields if diff_fields else 'field type mismatch'}"
34
+ )
35
+
36
+ @staticmethod
37
+ def _validate_key_conflicts(primary_keys: List[str], timestamp_keys: str):
38
+ """校验主键与时间戳键是否冲突"""
39
+ if timestamp_keys in primary_keys:
40
+ raise ValueError(f"Timestamp keys conflict with primary keys: {timestamp_keys}")
41
+
42
+ @staticmethod
43
+ def _validate_key_exists(primary_keys: List[str], timestamp_keys: str):
44
+ """校验主键与时间戳键是否存在"""
45
+ if not primary_keys:
46
+ raise ValueError("Primary keys cannot be empty")
47
+ if not timestamp_keys:
48
+ raise ValueError("Timestamp keys cannot be empty")
49
+
50
+ @staticmethod
51
+ def _escape_sql_value(value: str) -> str:
52
+ """转义SQL值中的特殊字符"""
53
+ return value.replace("'", "''")
54
+
55
+ @staticmethod
56
+ def _check_sequence_element_type(sequence: Sequence[Any], element_type: type) -> bool:
57
+ """检查序列中的元素是否为指定类型"""
58
+ return all(isinstance(element, element_type) for element in sequence)
@@ -0,0 +1,2 @@
1
+ from .client import FeatureCloudSDK
2
+ from . import models