upgini 1.1.262a3250.post4__tar.gz → 1.2.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini-1.2.31/.gitignore +157 -0
- {upgini-1.1.262a3250.post4/src/upgini.egg-info → upgini-1.2.31}/PKG-INFO +43 -30
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/README.md +24 -11
- upgini-1.2.31/pyproject.toml +124 -0
- upgini-1.2.31/src/upgini/__about__.py +1 -0
- upgini-1.2.31/src/upgini/__init__.py +5 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/ads.py +6 -2
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/ads_management/ads_manager.py +4 -2
- upgini-1.2.31/src/upgini/autofe/all_operands.py +87 -0
- upgini-1.2.31/src/upgini/autofe/binary.py +237 -0
- upgini-1.2.31/src/upgini/autofe/date.py +289 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/autofe/feature.py +103 -20
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/autofe/groupby.py +25 -23
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/autofe/operand.py +13 -9
- upgini-1.2.31/src/upgini/autofe/unary.py +155 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/autofe/vector.py +10 -8
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/data_source/data_source_publisher.py +142 -9
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/dataset.py +54 -389
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/errors.py +1 -1
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/features_enricher.py +1033 -545
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/http.py +29 -17
- upgini-1.2.31/src/upgini/lazy_import.py +35 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/mdc/__init__.py +1 -3
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/mdc/context.py +4 -6
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/metadata.py +87 -59
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/metrics.py +305 -111
- upgini-1.2.31/src/upgini/normalizer/normalize_utils.py +197 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/resource_bundle/__init__.py +5 -5
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/resource_bundle/strings.properties +68 -48
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/sampler/base.py +1 -4
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/sampler/random_under_sampler.py +2 -5
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/search_task.py +14 -8
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/spinner.py +1 -1
- upgini-1.2.31/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/__init__.py +3 -2
- upgini-1.2.31/src/upgini/utils/base_search_key_detector.py +27 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/blocked_time_series.py +4 -2
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/country_utils.py +18 -2
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/custom_loss_utils.py +42 -38
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/cv_utils.py +2 -2
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/datetime_utils.py +145 -35
- upgini-1.2.31/src/upgini/utils/deduplicate_utils.py +208 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/display_utils.py +46 -15
- upgini-1.2.31/src/upgini/utils/email_utils.py +132 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/fallback_progress_bar.py +1 -1
- upgini-1.2.31/src/upgini/utils/feature_info.py +172 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/features_validator.py +35 -20
- upgini-1.2.31/src/upgini/utils/ip_utils.py +152 -0
- upgini-1.1.262a3250.post4/src/upgini/normalizer/phone_normalizer.py → upgini-1.2.31/src/upgini/utils/phone_utils.py +41 -27
- upgini-1.2.31/src/upgini/utils/postal_code_utils.py +45 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/progress_bar.py +1 -1
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/sklearn_ext.py +43 -34
- upgini-1.2.31/src/upgini/utils/target_utils.py +253 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/track_info.py +27 -15
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/warning_counter.py +1 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/version_validator.py +10 -6
- upgini-1.1.262a3250.post4/PKG-INFO +0 -844
- upgini-1.1.262a3250.post4/pyproject.toml +0 -10
- upgini-1.1.262a3250.post4/setup.cfg +0 -4
- upgini-1.1.262a3250.post4/setup.py +0 -104
- upgini-1.1.262a3250.post4/src/upgini/__init__.py +0 -21
- upgini-1.1.262a3250.post4/src/upgini/autofe/all_operands.py +0 -46
- upgini-1.1.262a3250.post4/src/upgini/autofe/binary.py +0 -133
- upgini-1.1.262a3250.post4/src/upgini/autofe/date.py +0 -51
- upgini-1.1.262a3250.post4/src/upgini/autofe/unary.py +0 -112
- upgini-1.1.262a3250.post4/src/upgini/utils/base_search_key_detector.py +0 -25
- upgini-1.1.262a3250.post4/src/upgini/utils/deduplicate_utils.py +0 -142
- upgini-1.1.262a3250.post4/src/upgini/utils/email_utils.py +0 -94
- upgini-1.1.262a3250.post4/src/upgini/utils/ip_utils.py +0 -53
- upgini-1.1.262a3250.post4/src/upgini/utils/phone_utils.py +0 -11
- upgini-1.1.262a3250.post4/src/upgini/utils/postal_code_utils.py +0 -11
- upgini-1.1.262a3250.post4/src/upgini/utils/target_utils.py +0 -183
- upgini-1.1.262a3250.post4/src/upgini.egg-info/SOURCES.txt +0 -82
- upgini-1.1.262a3250.post4/src/upgini.egg-info/dependency_links.txt +0 -1
- upgini-1.1.262a3250.post4/src/upgini.egg-info/requires.txt +0 -13
- upgini-1.1.262a3250.post4/src/upgini.egg-info/top_level.txt +0 -1
- upgini-1.1.262a3250.post4/tests/test_autofe_operands.py +0 -27
- upgini-1.1.262a3250.post4/tests/test_binary_dataset.py +0 -47
- upgini-1.1.262a3250.post4/tests/test_blocked_time_series.py +0 -80
- upgini-1.1.262a3250.post4/tests/test_categorical_dataset.py +0 -44
- upgini-1.1.262a3250.post4/tests/test_continuous_dataset.py +0 -47
- upgini-1.1.262a3250.post4/tests/test_country_utils.py +0 -51
- upgini-1.1.262a3250.post4/tests/test_custom_loss_utils.py +0 -50
- upgini-1.1.262a3250.post4/tests/test_datetime_utils.py +0 -185
- upgini-1.1.262a3250.post4/tests/test_email_utils.py +0 -99
- upgini-1.1.262a3250.post4/tests/test_etalon_validation.py +0 -767
- upgini-1.1.262a3250.post4/tests/test_features_enricher.py +0 -2629
- upgini-1.1.262a3250.post4/tests/test_metrics.py +0 -1364
- upgini-1.1.262a3250.post4/tests/test_phone_utils.py +0 -31
- upgini-1.1.262a3250.post4/tests/test_postal_code_utils.py +0 -31
- upgini-1.1.262a3250.post4/tests/test_target_utils.py +0 -134
- upgini-1.1.262a3250.post4/tests/test_widget.py +0 -432
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/LICENSE +0 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.262a3250.post4 → upgini-1.2.31}/src/upgini/utils/format.py +0 -0
upgini-1.2.31/.gitignore
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
pip-wheel-metadata/
|
|
24
|
+
share/python-wheels/
|
|
25
|
+
*.egg-info/
|
|
26
|
+
.installed.cfg
|
|
27
|
+
*.egg
|
|
28
|
+
MANIFEST
|
|
29
|
+
|
|
30
|
+
# PyInstaller
|
|
31
|
+
# Usually these files are written by a python script from a template
|
|
32
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
33
|
+
*.manifest
|
|
34
|
+
*.spec
|
|
35
|
+
|
|
36
|
+
# Installer logs
|
|
37
|
+
pip-log.txt
|
|
38
|
+
pip-delete-this-directory.txt
|
|
39
|
+
|
|
40
|
+
# Unit test / coverage reports
|
|
41
|
+
htmlcov/
|
|
42
|
+
.tox/
|
|
43
|
+
.nox/
|
|
44
|
+
.coverage
|
|
45
|
+
.coverage.*
|
|
46
|
+
.cache
|
|
47
|
+
nosetests.xml
|
|
48
|
+
coverage.xml
|
|
49
|
+
*.cover
|
|
50
|
+
*.py,cover
|
|
51
|
+
.hypothesis/
|
|
52
|
+
.pytest_cache/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
target/
|
|
76
|
+
|
|
77
|
+
# Jupyter Notebook
|
|
78
|
+
.ipynb_checkpoints
|
|
79
|
+
|
|
80
|
+
# IPython
|
|
81
|
+
profile_default/
|
|
82
|
+
ipython_config.py
|
|
83
|
+
|
|
84
|
+
# pyenv
|
|
85
|
+
.python-version
|
|
86
|
+
|
|
87
|
+
# pipenv
|
|
88
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
89
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
90
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
91
|
+
# install all needed dependencies.
|
|
92
|
+
#Pipfile.lock
|
|
93
|
+
|
|
94
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
95
|
+
__pypackages__/
|
|
96
|
+
|
|
97
|
+
# Celery stuff
|
|
98
|
+
celerybeat-schedule
|
|
99
|
+
celerybeat.pid
|
|
100
|
+
|
|
101
|
+
# SageMath parsed files
|
|
102
|
+
*.sage.py
|
|
103
|
+
|
|
104
|
+
# Environments
|
|
105
|
+
.env
|
|
106
|
+
.venv
|
|
107
|
+
env/
|
|
108
|
+
env8/
|
|
109
|
+
env9/
|
|
110
|
+
env10/
|
|
111
|
+
.env10/
|
|
112
|
+
.env310/
|
|
113
|
+
env11/
|
|
114
|
+
venv/
|
|
115
|
+
ENV/
|
|
116
|
+
env.bak/
|
|
117
|
+
venv.bak/
|
|
118
|
+
|
|
119
|
+
# Spyder project settings
|
|
120
|
+
.spyderproject
|
|
121
|
+
.spyproject
|
|
122
|
+
|
|
123
|
+
# Rope project settings
|
|
124
|
+
.ropeproject
|
|
125
|
+
|
|
126
|
+
# mkdocs documentation
|
|
127
|
+
/site
|
|
128
|
+
|
|
129
|
+
# mypy
|
|
130
|
+
.mypy_cache/
|
|
131
|
+
.dmypy.json
|
|
132
|
+
dmypy.json
|
|
133
|
+
|
|
134
|
+
# Pyre type checker
|
|
135
|
+
.pyre/
|
|
136
|
+
|
|
137
|
+
# IDE
|
|
138
|
+
.vscode/
|
|
139
|
+
.idea/
|
|
140
|
+
|
|
141
|
+
# macOS
|
|
142
|
+
.DS_Store
|
|
143
|
+
|
|
144
|
+
# Other
|
|
145
|
+
.cache/
|
|
146
|
+
activate_venv.sh
|
|
147
|
+
test-results/
|
|
148
|
+
test_notebooks/
|
|
149
|
+
publish.sh
|
|
150
|
+
catboost_info/
|
|
151
|
+
build/
|
|
152
|
+
playgroung.ipynb
|
|
153
|
+
fingerprint.js
|
|
154
|
+
envVars.txt
|
|
155
|
+
.ruff_cache
|
|
156
|
+
.jupyter
|
|
157
|
+
*.excalidraw
|
|
@@ -1,14 +1,13 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.31
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
|
-
Home-page: https://upgini.com/
|
|
6
|
-
Author: Upgini Developers
|
|
7
|
-
Author-email: madewithlove@upgini.com
|
|
8
|
-
License: BSD 3-Clause License
|
|
9
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
|
+
Project-URL: Homepage, https://upgini.com/
|
|
10
7
|
Project-URL: Source, https://github.com/upgini/upgini
|
|
11
|
-
|
|
8
|
+
Author-email: Upgini Developers <madewithlove@upgini.com>
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: automl,data mining,data science,data search,machine learning
|
|
12
11
|
Classifier: Development Status :: 5 - Production/Stable
|
|
13
12
|
Classifier: Intended Audience :: Customer Service
|
|
14
13
|
Classifier: Intended Audience :: Developers
|
|
@@ -23,22 +22,23 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
23
22
|
Classifier: Programming Language :: Python :: 3.10
|
|
24
23
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
24
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
26
|
-
Requires-Python:
|
|
27
|
-
|
|
28
|
-
|
|
25
|
+
Requires-Python: <3.12,>=3.8
|
|
26
|
+
Requires-Dist: catboost>=1.0.3
|
|
27
|
+
Requires-Dist: fastparquet>=0.8.1
|
|
28
|
+
Requires-Dist: ipywidgets>=8.1.0
|
|
29
|
+
Requires-Dist: jarowinkler>=2.0.0
|
|
30
|
+
Requires-Dist: levenshtein>=0.25.1
|
|
31
|
+
Requires-Dist: numpy<=1.26.4,>=1.19.0
|
|
32
|
+
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
33
|
+
Requires-Dist: pydantic<3.0.0,>1.0.0
|
|
34
|
+
Requires-Dist: pyjwt>=2.8.0
|
|
35
|
+
Requires-Dist: python-bidi==0.4.2
|
|
29
36
|
Requires-Dist: python-dateutil>=2.8.0
|
|
37
|
+
Requires-Dist: python-json-logger>=2.0.2
|
|
30
38
|
Requires-Dist: requests>=2.8.0
|
|
31
|
-
Requires-Dist: pandas<2.0.0,>=1.1.0
|
|
32
|
-
Requires-Dist: numpy>=1.19.0
|
|
33
39
|
Requires-Dist: scikit-learn>=1.3.0
|
|
34
|
-
Requires-Dist: pydantic<2.0.0,>=1.8.2
|
|
35
|
-
Requires-Dist: fastparquet>=0.8.1
|
|
36
|
-
Requires-Dist: python-json-logger>=2.0.2
|
|
37
|
-
Requires-Dist: catboost>=1.0.3
|
|
38
|
-
Requires-Dist: lightgbm>=3.3.2
|
|
39
|
-
Requires-Dist: pyjwt>=2.8.0
|
|
40
40
|
Requires-Dist: xhtml2pdf==0.2.11
|
|
41
|
-
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
<!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> : low-code feature search and enrichment library for machine learning </h2> -->
|
|
@@ -132,7 +132,7 @@ Requires-Dist: ipywidgets>=8.1.0
|
|
|
132
132
|
|Consumer Confidence index| 44 |22|-|Monthly|date, country|No
|
|
133
133
|
|World economic indicators|191 |41|-|Monthly|date, country|No
|
|
134
134
|
|Markets data|-|17|-|Monthly|date, datetime|No
|
|
135
|
-
|World mobile & fixed broadband network coverage and
|
|
135
|
+
|World mobile & fixed broadband network coverage and performance |167|-|3|Monthly|country, postal/ZIP code|No
|
|
136
136
|
|World demographic data |90|-|2|Annual|country, postal/ZIP code|No
|
|
137
137
|
|World house prices |44|-|3|Annual|country, postal/ZIP code|No
|
|
138
138
|
|Public social media profile data |104|-|-|Monthly|date, email/HEM, phone |Yes
|
|
@@ -145,7 +145,7 @@ Requires-Dist: ipywidgets>=8.1.0
|
|
|
145
145
|
|
|
146
146
|
## 💼 Tutorials
|
|
147
147
|
|
|
148
|
-
### [Search of relevant external features & Automated feature generation for Salary
|
|
148
|
+
### [Search of relevant external features & Automated feature generation for Salary prediction task (use as a template)](https://github.com/upgini/upgini/blob/main/notebooks/Upgini_Features_search%26generation.ipynb)
|
|
149
149
|
|
|
150
150
|
* The goal is to predict salary for data science job postning based on information about employer and job description.
|
|
151
151
|
* Following this guide, you'll learn how to **search & auto generate new relevant features with Upgini library**
|
|
@@ -259,7 +259,9 @@ We do dataset verification and cleaning under the hood, but still there are some
|
|
|
259
259
|
*Search keys* columns will be used to match records from all potential external data sources / features.
|
|
260
260
|
Define one or multiple columns as a search keys with `FeaturesEnricher` class initialization.
|
|
261
261
|
```python
|
|
262
|
-
from upgini import FeaturesEnricher
|
|
262
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
263
|
+
from upgini.metadata import SearchKey
|
|
264
|
+
|
|
263
265
|
enricher = FeaturesEnricher(
|
|
264
266
|
search_keys={
|
|
265
267
|
"subscription_activation_date": SearchKey.DATE,
|
|
@@ -345,7 +347,9 @@ enricher = FeaturesEnricher(
|
|
|
345
347
|
|
|
346
348
|
For the meaning types <tt>SearchKey.DATE</tt>/<tt>SearchKey.DATETIME</tt> with dtypes <tt>object</tt> or <tt>string</tt> you have to clarify date/datetime format by passing <tt>date_format</tt> parameter to `FeaturesEnricher`. For example:
|
|
347
349
|
```python
|
|
348
|
-
from upgini import FeaturesEnricher
|
|
350
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
351
|
+
from upgini.metadata import SearchKey
|
|
352
|
+
|
|
349
353
|
enricher = FeaturesEnricher(
|
|
350
354
|
search_keys={
|
|
351
355
|
"subscription_activation_date": SearchKey.DATE,
|
|
@@ -366,7 +370,9 @@ df["date"] = df.date.astype("datetime64").dt.tz_localize("Europe/Warsaw")
|
|
|
366
370
|
|
|
367
371
|
Single country for the whole training dataset can be passed with `country_code` parameter:
|
|
368
372
|
```python
|
|
369
|
-
from upgini import FeaturesEnricher
|
|
373
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
374
|
+
from upgini.metadata import SearchKey
|
|
375
|
+
|
|
370
376
|
enricher = FeaturesEnricher(
|
|
371
377
|
search_keys={
|
|
372
378
|
"subscription_activation_date": SearchKey.DATE,
|
|
@@ -385,7 +391,8 @@ Create instance of the `FeaturesEnricher` class and call:
|
|
|
385
391
|
Let's try it out!
|
|
386
392
|
```python
|
|
387
393
|
import pandas as pd
|
|
388
|
-
from upgini import FeaturesEnricher
|
|
394
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
395
|
+
from upgini.metadata import SearchKey
|
|
389
396
|
|
|
390
397
|
# load labeled training dataset to initiate search
|
|
391
398
|
train_df = pd.read_csv("customer_churn_prediction_train.csv")
|
|
@@ -476,7 +483,9 @@ We detect ML task under the hood based on label column values. Currently we supp
|
|
|
476
483
|
|
|
477
484
|
But for certain search datasets you can pass parameter to `FeaturesEnricher` with correct ML taks type:
|
|
478
485
|
```python
|
|
479
|
-
from upgini import
|
|
486
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
487
|
+
from upgini.metadata import SearchKey, ModelTaskType
|
|
488
|
+
|
|
480
489
|
enricher = FeaturesEnricher(
|
|
481
490
|
search_keys={"subscription_activation_date": SearchKey.DATE},
|
|
482
491
|
model_task_type=ModelTaskType.REGRESSION
|
|
@@ -489,7 +498,9 @@ enricher = FeaturesEnricher(
|
|
|
489
498
|
|
|
490
499
|
To initiate feature search you can pass cross-validation type parameter to `FeaturesEnricher` with time series specific CV type:
|
|
491
500
|
```python
|
|
492
|
-
from upgini.
|
|
501
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
502
|
+
from upgini.metadata import SearchKey, CVType
|
|
503
|
+
|
|
493
504
|
enricher = FeaturesEnricher(
|
|
494
505
|
search_keys={"sales_date": SearchKey.DATE},
|
|
495
506
|
cv=CVType.time_series
|
|
@@ -623,7 +634,9 @@ But you can easily define new split by passing child of BaseCrossValidator to pa
|
|
|
623
634
|
|
|
624
635
|
Example with more tips-and-tricks:
|
|
625
636
|
```python
|
|
626
|
-
from upgini import FeaturesEnricher
|
|
637
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
638
|
+
from upgini.metadata import SearchKey
|
|
639
|
+
|
|
627
640
|
enricher = FeaturesEnricher(search_keys={"registration_date": SearchKey.DATE})
|
|
628
641
|
|
|
629
642
|
# Fit with default setup for metrics calculation
|
|
@@ -796,7 +809,7 @@ You may publish ANY data which you consider as royalty / license free ([Open Dat
|
|
|
796
809
|
2. Copy *Upgini API key* from profile and upload your data from Upgini python library with this key:
|
|
797
810
|
```python
|
|
798
811
|
import pandas as pd
|
|
799
|
-
from upgini import SearchKey
|
|
812
|
+
from upgini.metadata import SearchKey
|
|
800
813
|
from upgini.ads import upload_user_ads
|
|
801
814
|
import os
|
|
802
815
|
os.environ["UPGINI_API_KEY"] = "your_long_string_api_key_goes_here"
|
|
@@ -841,4 +854,4 @@ Some convenient ways to start contributing are:
|
|
|
841
854
|
- [More perks for registered users](https://profile.upgini.com)
|
|
842
855
|
|
|
843
856
|
<sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
|
|
844
|
-
Please report it here
|
|
857
|
+
Please report it here</a></sup>
|
|
@@ -90,7 +90,7 @@
|
|
|
90
90
|
|Consumer Confidence index| 44 |22|-|Monthly|date, country|No
|
|
91
91
|
|World economic indicators|191 |41|-|Monthly|date, country|No
|
|
92
92
|
|Markets data|-|17|-|Monthly|date, datetime|No
|
|
93
|
-
|World mobile & fixed broadband network coverage and
|
|
93
|
+
|World mobile & fixed broadband network coverage and performance |167|-|3|Monthly|country, postal/ZIP code|No
|
|
94
94
|
|World demographic data |90|-|2|Annual|country, postal/ZIP code|No
|
|
95
95
|
|World house prices |44|-|3|Annual|country, postal/ZIP code|No
|
|
96
96
|
|Public social media profile data |104|-|-|Monthly|date, email/HEM, phone |Yes
|
|
@@ -103,7 +103,7 @@
|
|
|
103
103
|
|
|
104
104
|
## 💼 Tutorials
|
|
105
105
|
|
|
106
|
-
### [Search of relevant external features & Automated feature generation for Salary
|
|
106
|
+
### [Search of relevant external features & Automated feature generation for Salary prediction task (use as a template)](https://github.com/upgini/upgini/blob/main/notebooks/Upgini_Features_search%26generation.ipynb)
|
|
107
107
|
|
|
108
108
|
* The goal is to predict salary for data science job postning based on information about employer and job description.
|
|
109
109
|
* Following this guide, you'll learn how to **search & auto generate new relevant features with Upgini library**
|
|
@@ -217,7 +217,9 @@ We do dataset verification and cleaning under the hood, but still there are some
|
|
|
217
217
|
*Search keys* columns will be used to match records from all potential external data sources / features.
|
|
218
218
|
Define one or multiple columns as a search keys with `FeaturesEnricher` class initialization.
|
|
219
219
|
```python
|
|
220
|
-
from upgini import FeaturesEnricher
|
|
220
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
221
|
+
from upgini.metadata import SearchKey
|
|
222
|
+
|
|
221
223
|
enricher = FeaturesEnricher(
|
|
222
224
|
search_keys={
|
|
223
225
|
"subscription_activation_date": SearchKey.DATE,
|
|
@@ -303,7 +305,9 @@ enricher = FeaturesEnricher(
|
|
|
303
305
|
|
|
304
306
|
For the meaning types <tt>SearchKey.DATE</tt>/<tt>SearchKey.DATETIME</tt> with dtypes <tt>object</tt> or <tt>string</tt> you have to clarify date/datetime format by passing <tt>date_format</tt> parameter to `FeaturesEnricher`. For example:
|
|
305
307
|
```python
|
|
306
|
-
from upgini import FeaturesEnricher
|
|
308
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
309
|
+
from upgini.metadata import SearchKey
|
|
310
|
+
|
|
307
311
|
enricher = FeaturesEnricher(
|
|
308
312
|
search_keys={
|
|
309
313
|
"subscription_activation_date": SearchKey.DATE,
|
|
@@ -324,7 +328,9 @@ df["date"] = df.date.astype("datetime64").dt.tz_localize("Europe/Warsaw")
|
|
|
324
328
|
|
|
325
329
|
Single country for the whole training dataset can be passed with `country_code` parameter:
|
|
326
330
|
```python
|
|
327
|
-
from upgini import FeaturesEnricher
|
|
331
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
332
|
+
from upgini.metadata import SearchKey
|
|
333
|
+
|
|
328
334
|
enricher = FeaturesEnricher(
|
|
329
335
|
search_keys={
|
|
330
336
|
"subscription_activation_date": SearchKey.DATE,
|
|
@@ -343,7 +349,8 @@ Create instance of the `FeaturesEnricher` class and call:
|
|
|
343
349
|
Let's try it out!
|
|
344
350
|
```python
|
|
345
351
|
import pandas as pd
|
|
346
|
-
from upgini import FeaturesEnricher
|
|
352
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
353
|
+
from upgini.metadata import SearchKey
|
|
347
354
|
|
|
348
355
|
# load labeled training dataset to initiate search
|
|
349
356
|
train_df = pd.read_csv("customer_churn_prediction_train.csv")
|
|
@@ -434,7 +441,9 @@ We detect ML task under the hood based on label column values. Currently we supp
|
|
|
434
441
|
|
|
435
442
|
But for certain search datasets you can pass parameter to `FeaturesEnricher` with correct ML taks type:
|
|
436
443
|
```python
|
|
437
|
-
from upgini import
|
|
444
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
445
|
+
from upgini.metadata import SearchKey, ModelTaskType
|
|
446
|
+
|
|
438
447
|
enricher = FeaturesEnricher(
|
|
439
448
|
search_keys={"subscription_activation_date": SearchKey.DATE},
|
|
440
449
|
model_task_type=ModelTaskType.REGRESSION
|
|
@@ -447,7 +456,9 @@ enricher = FeaturesEnricher(
|
|
|
447
456
|
|
|
448
457
|
To initiate feature search you can pass cross-validation type parameter to `FeaturesEnricher` with time series specific CV type:
|
|
449
458
|
```python
|
|
450
|
-
from upgini.
|
|
459
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
460
|
+
from upgini.metadata import SearchKey, CVType
|
|
461
|
+
|
|
451
462
|
enricher = FeaturesEnricher(
|
|
452
463
|
search_keys={"sales_date": SearchKey.DATE},
|
|
453
464
|
cv=CVType.time_series
|
|
@@ -581,7 +592,9 @@ But you can easily define new split by passing child of BaseCrossValidator to pa
|
|
|
581
592
|
|
|
582
593
|
Example with more tips-and-tricks:
|
|
583
594
|
```python
|
|
584
|
-
from upgini import FeaturesEnricher
|
|
595
|
+
from upgini.features_enricher import FeaturesEnricher
|
|
596
|
+
from upgini.metadata import SearchKey
|
|
597
|
+
|
|
585
598
|
enricher = FeaturesEnricher(search_keys={"registration_date": SearchKey.DATE})
|
|
586
599
|
|
|
587
600
|
# Fit with default setup for metrics calculation
|
|
@@ -754,7 +767,7 @@ You may publish ANY data which you consider as royalty / license free ([Open Dat
|
|
|
754
767
|
2. Copy *Upgini API key* from profile and upload your data from Upgini python library with this key:
|
|
755
768
|
```python
|
|
756
769
|
import pandas as pd
|
|
757
|
-
from upgini import SearchKey
|
|
770
|
+
from upgini.metadata import SearchKey
|
|
758
771
|
from upgini.ads import upload_user_ads
|
|
759
772
|
import os
|
|
760
773
|
os.environ["UPGINI_API_KEY"] = "your_long_string_api_key_goes_here"
|
|
@@ -799,4 +812,4 @@ Some convenient ways to start contributing are:
|
|
|
799
812
|
- [More perks for registered users](https://profile.upgini.com)
|
|
800
813
|
|
|
801
814
|
<sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
|
|
802
|
-
Please report it here
|
|
815
|
+
Please report it here</a></sup>
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "upgini"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Intelligent data search & enrichment for Machine Learning"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8,<3.12"
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Upgini Developers", email = "madewithlove@upgini.com" },
|
|
13
|
+
]
|
|
14
|
+
keywords = [
|
|
15
|
+
"automl",
|
|
16
|
+
"data mining",
|
|
17
|
+
"data science",
|
|
18
|
+
"data search",
|
|
19
|
+
"machine learning",
|
|
20
|
+
]
|
|
21
|
+
classifiers = [
|
|
22
|
+
"Development Status :: 5 - Production/Stable",
|
|
23
|
+
"Intended Audience :: Customer Service",
|
|
24
|
+
"Intended Audience :: Developers",
|
|
25
|
+
"Intended Audience :: Financial and Insurance Industry",
|
|
26
|
+
"Intended Audience :: Information Technology",
|
|
27
|
+
"Intended Audience :: Science/Research",
|
|
28
|
+
"Intended Audience :: Telecommunications Industry",
|
|
29
|
+
"License :: OSI Approved :: BSD License",
|
|
30
|
+
"Operating System :: OS Independent",
|
|
31
|
+
"Programming Language :: Python :: 3.8",
|
|
32
|
+
"Programming Language :: Python :: 3.9",
|
|
33
|
+
"Programming Language :: Python :: 3.10",
|
|
34
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
35
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
36
|
+
]
|
|
37
|
+
dependencies = [
|
|
38
|
+
"catboost>=1.0.3",
|
|
39
|
+
"fastparquet>=0.8.1",
|
|
40
|
+
"ipywidgets>=8.1.0",
|
|
41
|
+
"numpy>=1.19.0,<=1.26.4",
|
|
42
|
+
"pandas>=1.1.0,<3.0.0",
|
|
43
|
+
"pydantic>1.0.0,<3.0.0",
|
|
44
|
+
"pyjwt>=2.8.0",
|
|
45
|
+
"python-dateutil>=2.8.0",
|
|
46
|
+
"python-json-logger>=2.0.2",
|
|
47
|
+
"requests>=2.8.0",
|
|
48
|
+
"scikit-learn>=1.3.0",
|
|
49
|
+
"python-bidi==0.4.2",
|
|
50
|
+
"xhtml2pdf==0.2.11",
|
|
51
|
+
"jarowinkler>=2.0.0",
|
|
52
|
+
"levenshtein>=0.25.1",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
[project.urls]
|
|
56
|
+
"Bug Reports" = "https://github.com/upgini/upgini/issues"
|
|
57
|
+
Homepage = "https://upgini.com/"
|
|
58
|
+
Source = "https://github.com/upgini/upgini"
|
|
59
|
+
|
|
60
|
+
[tool.hatch.version]
|
|
61
|
+
path = "src/upgini/__about__.py"
|
|
62
|
+
|
|
63
|
+
[tool.hatch.build.targets.sdist]
|
|
64
|
+
include = [
|
|
65
|
+
"src"
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
[tool.hatch.build.targets.wheel]
|
|
69
|
+
packages = [
|
|
70
|
+
"src/upgini"
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
[tool.hatch.build]
|
|
74
|
+
include = [
|
|
75
|
+
"/src/utils/Roboto-Regular.ttf",
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
[tool.hatch.envs.default]
|
|
79
|
+
type = "virtual"
|
|
80
|
+
python = "3.11"
|
|
81
|
+
|
|
82
|
+
[tool.hatch.envs.test.scripts]
|
|
83
|
+
cov = 'pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=upgini --cov=tests'
|
|
84
|
+
format = "black {args}"
|
|
85
|
+
lint = "ruff check {args}"
|
|
86
|
+
test_all = 'pytest -s -vv tests'
|
|
87
|
+
|
|
88
|
+
[[tool.hatch.envs.test.matrix]]
|
|
89
|
+
python = ["3.8"]
|
|
90
|
+
pandas = ["1.1.0"]
|
|
91
|
+
|
|
92
|
+
[[tool.hatch.envs.test.matrix]]
|
|
93
|
+
python = ["3.8", "3.9", "3.10", "3.11"]
|
|
94
|
+
pandas = ["1.2.0", "1.3.0", "1.4.0", "1.5.0", "2.0.0"]
|
|
95
|
+
|
|
96
|
+
[[tool.hatch.envs.test.matrix]]
|
|
97
|
+
python = ["3.9", "3.10", "3.11"]
|
|
98
|
+
pandas = ["2.1.0", "2.2.0"]
|
|
99
|
+
|
|
100
|
+
# from versions: 0.1, 0.2, 0.3.0, 0.4.0, 0.4.1, 0.4.2, 0.4.3, 0.5.0, 0.6.0, 0.6.1, 0.7.0, 0.7.1, 0.7.2, 0.7.3, 0.8.0, 0.8.1, 0.9.0, 0.9.1, 0.10.0, 0.10.1, 0.11.0, 0.12.0, 0.13.0, 0.13.1, 0.14.0, 0.14.1, 0.15.0, 0.15.1, 0.15.2, 0.16.0, 0.16.1, 0.16.2, 0.17.0, 0.17.1, 0.18.0, 0.18.1, 0.19.0, 0.19.1, 0.19.2, 0.20.0, 0.20.1, 0.20.2, 0.20.3, 0.21.0, 0.21.1, 0.22.0, 0.23.0, 0.23.1, 0.23.2, 0.23.3, 0.23.4, 0.24.0, 0.24.1, 0.24.2, 0.25.0, 0.25.1, 0.25.2, 0.25.3, 1.0.0, 1.0.1, 1.0.2, 1.0.3, 1.0.4, 1.0.5, 1.1.0, 1.1.1, 1.1.2, 1.1.3, 1.1.4, 1.1.5, 1.2.0, 1.2.1, 1.2.2, 1.2.3, 1.2.4, 1.2.5, 1.3.0, 1.3.1, 1.3.2, 1.3.3, 1.3.4, 1.3.5, 1.4.0rc0, 1.4.0, 1.4.1, 1.4.2, 1.4.3, 1.4.4, 1.5.0rc0, 1.5.0, 1.5.1, 1.5.2, 1.5.3, 2.0.0rc0, 2.0.0rc1, 2.0.0, 2.0.1, 2.0.2, 2.0.3
|
|
101
|
+
|
|
102
|
+
[tool.hatch.envs.test]
|
|
103
|
+
dependencies = [
|
|
104
|
+
"coverage[toml]",
|
|
105
|
+
"pytest",
|
|
106
|
+
"pytest-cov",
|
|
107
|
+
# "pytest-timeout",
|
|
108
|
+
"requests-mock",
|
|
109
|
+
"pytest-datafiles",
|
|
110
|
+
"pytest-xdist",
|
|
111
|
+
"pandas~={matrix:pandas}",
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
[tool.black]
|
|
115
|
+
line-length = 120
|
|
116
|
+
|
|
117
|
+
[tool.isort]
|
|
118
|
+
profile = "black"
|
|
119
|
+
|
|
120
|
+
[tool.pytest.ini_options]
|
|
121
|
+
pythonpath = [
|
|
122
|
+
"./src"
|
|
123
|
+
]
|
|
124
|
+
addopts="-n 4"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.31"
|
|
@@ -5,7 +5,7 @@ from typing import Dict, Optional
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
8
|
-
from pandas.api.types import is_string_dtype
|
|
8
|
+
from pandas.api.types import is_object_dtype, is_string_dtype
|
|
9
9
|
|
|
10
10
|
from upgini import SearchKey
|
|
11
11
|
from upgini.http import get_rest_client
|
|
@@ -34,7 +34,11 @@ def upload_user_ads(name: str, df: pd.DataFrame, search_keys: Dict[str, SearchKe
|
|
|
34
34
|
if df[column_name].notnull().sum() < min_valid_rows_count:
|
|
35
35
|
raise ValueError(bundle.get("ads_upload_to_many_empty_rows"))
|
|
36
36
|
meaning_type = search_keys[column_name].value
|
|
37
|
-
if
|
|
37
|
+
if (
|
|
38
|
+
meaning_type == FileColumnMeaningType.MSISDN
|
|
39
|
+
and not is_string_dtype(df[column_name])
|
|
40
|
+
and not is_object_dtype(df[column_name])
|
|
41
|
+
):
|
|
38
42
|
df[column_name] = df[column_name].values.astype(np.int64).astype("string") # type: ignore
|
|
39
43
|
else:
|
|
40
44
|
meaning_type = FileColumnMeaningType.FEATURE
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
2
|
+
from typing import Dict
|
|
3
|
+
|
|
4
|
+
from upgini.autofe.binary import (
|
|
5
|
+
Add,
|
|
6
|
+
Combine,
|
|
7
|
+
CombineThenFreq,
|
|
8
|
+
Distance,
|
|
9
|
+
Divide,
|
|
10
|
+
JaroWinklerSim1,
|
|
11
|
+
JaroWinklerSim2,
|
|
12
|
+
LevenshteinSim,
|
|
13
|
+
Max,
|
|
14
|
+
Min,
|
|
15
|
+
Multiply,
|
|
16
|
+
Sim,
|
|
17
|
+
Subtract,
|
|
18
|
+
)
|
|
19
|
+
from upgini.autofe.date import (
|
|
20
|
+
DateDiff,
|
|
21
|
+
DateDiffType2,
|
|
22
|
+
DateListDiff,
|
|
23
|
+
DateListDiffBounded,
|
|
24
|
+
DatePercentile,
|
|
25
|
+
DatePercentileMethod2,
|
|
26
|
+
)
|
|
27
|
+
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenFreq, GroupByThenNUnique, GroupByThenRank
|
|
28
|
+
from upgini.autofe.operand import Operand
|
|
29
|
+
from upgini.autofe.unary import Abs, Embeddings, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
|
|
30
|
+
from upgini.autofe.vector import Mean, Sum
|
|
31
|
+
|
|
32
|
+
ALL_OPERANDS: Dict[str, Operand] = {
|
|
33
|
+
op.name: op
|
|
34
|
+
for op in [
|
|
35
|
+
Freq(),
|
|
36
|
+
Mean(),
|
|
37
|
+
Sum(),
|
|
38
|
+
Abs(),
|
|
39
|
+
Log(),
|
|
40
|
+
Sqrt(),
|
|
41
|
+
Square(),
|
|
42
|
+
Sigmoid(),
|
|
43
|
+
Floor(),
|
|
44
|
+
Residual(),
|
|
45
|
+
Min(),
|
|
46
|
+
Max(),
|
|
47
|
+
Add(),
|
|
48
|
+
Subtract(),
|
|
49
|
+
Multiply(),
|
|
50
|
+
Divide(),
|
|
51
|
+
GroupByThenAgg(name="GroupByThenMin", agg="min"),
|
|
52
|
+
GroupByThenAgg(name="GroupByThenMax", agg="max"),
|
|
53
|
+
GroupByThenAgg(name="GroupByThenMean", agg="mean"),
|
|
54
|
+
GroupByThenAgg(name="GroupByThenMedian", agg="median"),
|
|
55
|
+
GroupByThenAgg(name="GroupByThenStd", output_type="float", agg="std"),
|
|
56
|
+
GroupByThenRank(),
|
|
57
|
+
Combine(),
|
|
58
|
+
CombineThenFreq(),
|
|
59
|
+
GroupByThenNUnique(),
|
|
60
|
+
GroupByThenFreq(),
|
|
61
|
+
Sim(),
|
|
62
|
+
DateDiff(),
|
|
63
|
+
DateDiffType2(),
|
|
64
|
+
DateListDiff(aggregation="min"),
|
|
65
|
+
DateListDiff(aggregation="max"),
|
|
66
|
+
DateListDiff(aggregation="mean"),
|
|
67
|
+
DateListDiff(aggregation="nunique"),
|
|
68
|
+
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=0, upper_bound=18),
|
|
69
|
+
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=18, upper_bound=23),
|
|
70
|
+
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=23, upper_bound=30),
|
|
71
|
+
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=30, upper_bound=45),
|
|
72
|
+
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
|
|
73
|
+
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
|
|
74
|
+
DatePercentile(),
|
|
75
|
+
DatePercentileMethod2(),
|
|
76
|
+
Norm(),
|
|
77
|
+
JaroWinklerSim1(),
|
|
78
|
+
JaroWinklerSim2(),
|
|
79
|
+
LevenshteinSim(),
|
|
80
|
+
Distance(),
|
|
81
|
+
Embeddings(),
|
|
82
|
+
]
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def find_op(name):
|
|
87
|
+
return deepcopy(ALL_OPERANDS.get(name))
|