upgini 1.1.278a2__tar.gz → 1.1.279__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini-1.1.279/.gitignore +156 -0
- {upgini-1.1.278a2/src/upgini.egg-info → upgini-1.1.279}/PKG-INFO +18 -20
- upgini-1.1.279/pyproject.toml +102 -0
- upgini-1.1.279/src/upgini/__about__.py +1 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/ads_management/ads_manager.py +4 -2
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/autofe/all_operands.py +3 -2
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/autofe/binary.py +2 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/autofe/date.py +2 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/autofe/feature.py +1 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/autofe/groupby.py +3 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/autofe/operand.py +4 -3
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/autofe/unary.py +2 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/autofe/vector.py +2 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/dataset.py +6 -15
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/errors.py +1 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/features_enricher.py +104 -217
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/http.py +11 -10
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/mdc/__init__.py +1 -3
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/mdc/context.py +4 -6
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/metadata.py +5 -10
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/metrics.py +102 -100
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/normalizer/phone_normalizer.py +1 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/resource_bundle/__init__.py +5 -5
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/resource_bundle/strings.properties +0 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/sampler/base.py +1 -4
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/sampler/random_under_sampler.py +2 -5
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/search_task.py +4 -4
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/spinner.py +1 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/__init__.py +1 -1
- upgini-1.1.279/src/upgini/utils/base_search_key_detector.py +25 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/blocked_time_series.py +4 -2
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/country_utils.py +1 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/custom_loss_utils.py +3 -2
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/cv_utils.py +2 -2
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/datetime_utils.py +20 -15
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/deduplicate_utils.py +1 -11
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/email_utils.py +2 -7
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/fallback_progress_bar.py +1 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/progress_bar.py +1 -1
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/sklearn_ext.py +14 -13
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/track_info.py +2 -2
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/version_validator.py +2 -2
- upgini-1.1.278a2/PKG-INFO +0 -844
- upgini-1.1.278a2/pyproject.toml +0 -10
- upgini-1.1.278a2/setup.cfg +0 -4
- upgini-1.1.278a2/setup.py +0 -104
- upgini-1.1.278a2/src/upgini/utils/base_search_key_detector.py +0 -27
- upgini-1.1.278a2/src/upgini.egg-info/SOURCES.txt +0 -82
- upgini-1.1.278a2/src/upgini.egg-info/dependency_links.txt +0 -1
- upgini-1.1.278a2/src/upgini.egg-info/requires.txt +0 -13
- upgini-1.1.278a2/src/upgini.egg-info/top_level.txt +0 -1
- upgini-1.1.278a2/tests/test_autofe_operands.py +0 -94
- upgini-1.1.278a2/tests/test_binary_dataset.py +0 -47
- upgini-1.1.278a2/tests/test_blocked_time_series.py +0 -80
- upgini-1.1.278a2/tests/test_categorical_dataset.py +0 -44
- upgini-1.1.278a2/tests/test_continuous_dataset.py +0 -47
- upgini-1.1.278a2/tests/test_country_utils.py +0 -51
- upgini-1.1.278a2/tests/test_custom_loss_utils.py +0 -50
- upgini-1.1.278a2/tests/test_datetime_utils.py +0 -213
- upgini-1.1.278a2/tests/test_email_utils.py +0 -97
- upgini-1.1.278a2/tests/test_etalon_validation.py +0 -786
- upgini-1.1.278a2/tests/test_features_enricher.py +0 -2670
- upgini-1.1.278a2/tests/test_metrics.py +0 -1365
- upgini-1.1.278a2/tests/test_phone_utils.py +0 -31
- upgini-1.1.278a2/tests/test_postal_code_utils.py +0 -31
- upgini-1.1.278a2/tests/test_target_utils.py +0 -194
- upgini-1.1.278a2/tests/test_widget.py +0 -432
- {upgini-1.1.278a2 → upgini-1.1.279}/LICENSE +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/README.md +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/ads.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.278a2 → upgini-1.1.279}/src/upgini/utils/warning_counter.py +0 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
pip-wheel-metadata/
|
|
24
|
+
share/python-wheels/
|
|
25
|
+
*.egg-info/
|
|
26
|
+
.installed.cfg
|
|
27
|
+
*.egg
|
|
28
|
+
MANIFEST
|
|
29
|
+
|
|
30
|
+
# PyInstaller
|
|
31
|
+
# Usually these files are written by a python script from a template
|
|
32
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
33
|
+
*.manifest
|
|
34
|
+
*.spec
|
|
35
|
+
|
|
36
|
+
# Installer logs
|
|
37
|
+
pip-log.txt
|
|
38
|
+
pip-delete-this-directory.txt
|
|
39
|
+
|
|
40
|
+
# Unit test / coverage reports
|
|
41
|
+
htmlcov/
|
|
42
|
+
.tox/
|
|
43
|
+
.nox/
|
|
44
|
+
.coverage
|
|
45
|
+
.coverage.*
|
|
46
|
+
.cache
|
|
47
|
+
nosetests.xml
|
|
48
|
+
coverage.xml
|
|
49
|
+
*.cover
|
|
50
|
+
*.py,cover
|
|
51
|
+
.hypothesis/
|
|
52
|
+
.pytest_cache/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
target/
|
|
76
|
+
|
|
77
|
+
# Jupyter Notebook
|
|
78
|
+
.ipynb_checkpoints
|
|
79
|
+
|
|
80
|
+
# IPython
|
|
81
|
+
profile_default/
|
|
82
|
+
ipython_config.py
|
|
83
|
+
|
|
84
|
+
# pyenv
|
|
85
|
+
.python-version
|
|
86
|
+
|
|
87
|
+
# pipenv
|
|
88
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
89
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
90
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
91
|
+
# install all needed dependencies.
|
|
92
|
+
#Pipfile.lock
|
|
93
|
+
|
|
94
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
95
|
+
__pypackages__/
|
|
96
|
+
|
|
97
|
+
# Celery stuff
|
|
98
|
+
celerybeat-schedule
|
|
99
|
+
celerybeat.pid
|
|
100
|
+
|
|
101
|
+
# SageMath parsed files
|
|
102
|
+
*.sage.py
|
|
103
|
+
|
|
104
|
+
# Environments
|
|
105
|
+
.env
|
|
106
|
+
.venv
|
|
107
|
+
env/
|
|
108
|
+
env8/
|
|
109
|
+
env9/
|
|
110
|
+
env10/
|
|
111
|
+
.env10/
|
|
112
|
+
.env310/
|
|
113
|
+
env11/
|
|
114
|
+
venv/
|
|
115
|
+
ENV/
|
|
116
|
+
env.bak/
|
|
117
|
+
venv.bak/
|
|
118
|
+
|
|
119
|
+
# Spyder project settings
|
|
120
|
+
.spyderproject
|
|
121
|
+
.spyproject
|
|
122
|
+
|
|
123
|
+
# Rope project settings
|
|
124
|
+
.ropeproject
|
|
125
|
+
|
|
126
|
+
# mkdocs documentation
|
|
127
|
+
/site
|
|
128
|
+
|
|
129
|
+
# mypy
|
|
130
|
+
.mypy_cache/
|
|
131
|
+
.dmypy.json
|
|
132
|
+
dmypy.json
|
|
133
|
+
|
|
134
|
+
# Pyre type checker
|
|
135
|
+
.pyre/
|
|
136
|
+
|
|
137
|
+
# IDE
|
|
138
|
+
.vscode/
|
|
139
|
+
.idea/
|
|
140
|
+
|
|
141
|
+
# macOS
|
|
142
|
+
.DS_Store
|
|
143
|
+
|
|
144
|
+
# Other
|
|
145
|
+
.cache/
|
|
146
|
+
activate_venv.sh
|
|
147
|
+
test-results/
|
|
148
|
+
test_notebooks/
|
|
149
|
+
publish.sh
|
|
150
|
+
catboost_info/
|
|
151
|
+
build/
|
|
152
|
+
playgroung.ipynb
|
|
153
|
+
fingerprint.js
|
|
154
|
+
envVars.txt
|
|
155
|
+
.ruff_cache
|
|
156
|
+
.jupyter
|
|
@@ -1,14 +1,13 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.279
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
|
-
Home-page: https://upgini.com/
|
|
6
|
-
Author: Upgini Developers
|
|
7
|
-
Author-email: madewithlove@upgini.com
|
|
8
|
-
License: BSD 3-Clause License
|
|
9
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
|
+
Project-URL: Homepage, https://upgini.com/
|
|
10
7
|
Project-URL: Source, https://github.com/upgini/upgini
|
|
11
|
-
|
|
8
|
+
Author-email: Upgini Developers <madewithlove@upgini.com>
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: automl,data mining,data science,data search,machine learning
|
|
12
11
|
Classifier: Development Status :: 5 - Production/Stable
|
|
13
12
|
Classifier: Intended Audience :: Customer Service
|
|
14
13
|
Classifier: Intended Audience :: Developers
|
|
@@ -23,22 +22,21 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
23
22
|
Classifier: Programming Language :: Python :: 3.10
|
|
24
23
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
24
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
26
|
-
Requires-Python:
|
|
27
|
-
Description-Content-Type: text/markdown
|
|
28
|
-
License-File: LICENSE
|
|
29
|
-
Requires-Dist: python-dateutil>=2.8.0
|
|
30
|
-
Requires-Dist: requests>=2.8.0
|
|
31
|
-
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
32
|
-
Requires-Dist: numpy>=1.19.0
|
|
33
|
-
Requires-Dist: scikit-learn>=1.3.0
|
|
34
|
-
Requires-Dist: pydantic<2.0.0,>=1.8.2
|
|
35
|
-
Requires-Dist: fastparquet>=0.8.1
|
|
36
|
-
Requires-Dist: python-json-logger>=2.0.2
|
|
25
|
+
Requires-Python: <3.11,>=3.8
|
|
37
26
|
Requires-Dist: catboost>=1.0.3
|
|
27
|
+
Requires-Dist: fastparquet>=0.8.1
|
|
28
|
+
Requires-Dist: ipywidgets>=8.1.0
|
|
38
29
|
Requires-Dist: lightgbm>=3.3.2
|
|
30
|
+
Requires-Dist: numpy>=1.19.0
|
|
31
|
+
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
32
|
+
Requires-Dist: pydantic<2.0.0,>=1.8.2
|
|
39
33
|
Requires-Dist: pyjwt>=2.8.0
|
|
34
|
+
Requires-Dist: python-dateutil>=2.8.0
|
|
35
|
+
Requires-Dist: python-json-logger>=2.0.2
|
|
36
|
+
Requires-Dist: requests>=2.8.0
|
|
37
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
40
38
|
Requires-Dist: xhtml2pdf==0.2.11
|
|
41
|
-
|
|
39
|
+
Description-Content-Type: text/markdown
|
|
42
40
|
|
|
43
41
|
|
|
44
42
|
<!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> : low-code feature search and enrichment library for machine learning </h2> -->
|
|
@@ -841,4 +839,4 @@ Some convenient ways to start contributing are:
|
|
|
841
839
|
- [More perks for registered users](https://profile.upgini.com)
|
|
842
840
|
|
|
843
841
|
<sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
|
|
844
|
-
Please report it here.</a></sup>
|
|
842
|
+
Please report it here.</a></sup>
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "upgini"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Intelligent data search & enrichment for Machine Learning"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8,<3.11"
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Upgini Developers", email = "madewithlove@upgini.com" },
|
|
13
|
+
]
|
|
14
|
+
keywords = [
|
|
15
|
+
"automl",
|
|
16
|
+
"data mining",
|
|
17
|
+
"data science",
|
|
18
|
+
"data search",
|
|
19
|
+
"machine learning",
|
|
20
|
+
]
|
|
21
|
+
classifiers = [
|
|
22
|
+
"Development Status :: 5 - Production/Stable",
|
|
23
|
+
"Intended Audience :: Customer Service",
|
|
24
|
+
"Intended Audience :: Developers",
|
|
25
|
+
"Intended Audience :: Financial and Insurance Industry",
|
|
26
|
+
"Intended Audience :: Information Technology",
|
|
27
|
+
"Intended Audience :: Science/Research",
|
|
28
|
+
"Intended Audience :: Telecommunications Industry",
|
|
29
|
+
"License :: OSI Approved :: BSD License",
|
|
30
|
+
"Operating System :: OS Independent",
|
|
31
|
+
"Programming Language :: Python :: 3.8",
|
|
32
|
+
"Programming Language :: Python :: 3.9",
|
|
33
|
+
"Programming Language :: Python :: 3.10",
|
|
34
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
35
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
36
|
+
]
|
|
37
|
+
dependencies = [
|
|
38
|
+
"catboost>=1.0.3",
|
|
39
|
+
"fastparquet>=0.8.1",
|
|
40
|
+
"ipywidgets>=8.1.0",
|
|
41
|
+
"lightgbm>=3.3.2",
|
|
42
|
+
"numpy>=1.19.0",
|
|
43
|
+
"pandas>=1.1.0,<3.0.0",
|
|
44
|
+
"pydantic>=1.8.2,<2.0.0",
|
|
45
|
+
"pyjwt>=2.8.0",
|
|
46
|
+
"python-dateutil>=2.8.0",
|
|
47
|
+
"python-json-logger>=2.0.2",
|
|
48
|
+
"requests>=2.8.0",
|
|
49
|
+
"scikit-learn>=1.3.0",
|
|
50
|
+
"xhtml2pdf==0.2.11",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[project.urls]
|
|
54
|
+
"Bug Reports" = "https://github.com/upgini/upgini/issues"
|
|
55
|
+
Homepage = "https://upgini.com/"
|
|
56
|
+
Source = "https://github.com/upgini/upgini"
|
|
57
|
+
|
|
58
|
+
[tool.hatch.version]
|
|
59
|
+
path = "src/upgini/__about__.py"
|
|
60
|
+
|
|
61
|
+
[tool.hatch.build.targets.sdist]
|
|
62
|
+
include = [
|
|
63
|
+
"src"
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
[tool.hatch.build.targets.wheel]
|
|
67
|
+
packages = [
|
|
68
|
+
"src/upgini"
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
[tool.hatch.envs.default]
|
|
72
|
+
type = "virtual"
|
|
73
|
+
python = "3.10"
|
|
74
|
+
|
|
75
|
+
[tool.hatch.envs.test]
|
|
76
|
+
dependencies = [
|
|
77
|
+
"coverage[toml]",
|
|
78
|
+
"pytest",
|
|
79
|
+
"pytest-cov",
|
|
80
|
+
"requests-mock",
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
[tool.hatch.envs.test.scripts]
|
|
84
|
+
cov = 'pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=upgini --cov=tests {args}'
|
|
85
|
+
format = "black {args}"
|
|
86
|
+
lint = "ruff check {args}"
|
|
87
|
+
|
|
88
|
+
test_binary = 'pytest -s -vv tests/test_binary_dataset.py'
|
|
89
|
+
|
|
90
|
+
[[tool.hatch.envs.test.matrix]]
|
|
91
|
+
python = ["3.8", "3.9", "3.10"]
|
|
92
|
+
|
|
93
|
+
[tool.black]
|
|
94
|
+
line-length = 120
|
|
95
|
+
|
|
96
|
+
[tool.isort]
|
|
97
|
+
profile = "black"
|
|
98
|
+
|
|
99
|
+
[tool.pytest.ini_options]
|
|
100
|
+
pythonpath = [
|
|
101
|
+
"./src"
|
|
102
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.279"
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
|
|
2
4
|
from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
|
|
3
5
|
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
4
6
|
from upgini.autofe.operand import Operand
|
|
5
|
-
from upgini.autofe.unary import Abs,
|
|
6
|
-
from upgini.autofe.binary import Min, Max, Add, Subtract, Multiply, Divide, Sim
|
|
7
|
+
from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
|
|
7
8
|
from upgini.autofe.vector import Mean, Sum
|
|
8
9
|
|
|
9
10
|
ALL_OPERANDS: Dict[str, Operand] = {
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
2
1
|
import numpy as np
|
|
3
2
|
import pandas as pd
|
|
4
3
|
from numpy import dot
|
|
5
4
|
from numpy.linalg import norm
|
|
6
5
|
|
|
6
|
+
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
7
|
+
|
|
7
8
|
|
|
8
9
|
class Min(PandasOperand):
|
|
9
10
|
name = "min"
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from typing import Any, Optional, Union
|
|
2
|
+
|
|
2
3
|
import numpy as np
|
|
3
4
|
import pandas as pd
|
|
4
|
-
from pydantic import BaseModel
|
|
5
5
|
from pandas.core.arrays.timedeltas import TimedeltaArray
|
|
6
|
+
from pydantic import BaseModel
|
|
6
7
|
|
|
7
8
|
from upgini.autofe.operand import PandasOperand
|
|
8
9
|
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
2
1
|
from typing import Optional
|
|
2
|
+
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
|
+
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
class GroupByThenAgg(PandasOperand, VectorizableMixin):
|
|
7
9
|
agg: Optional[str]
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from pydantic import BaseModel
|
|
2
|
-
from typing import Dict, List, Optional, Tuple, Union
|
|
3
1
|
import abc
|
|
4
|
-
import
|
|
2
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
3
|
+
|
|
5
4
|
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from pydantic import BaseModel
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class Operand(BaseModel):
|
|
@@ -15,17 +15,15 @@ from pandas.api.types import (
|
|
|
15
15
|
is_float_dtype,
|
|
16
16
|
is_integer_dtype,
|
|
17
17
|
is_numeric_dtype,
|
|
18
|
+
is_object_dtype,
|
|
18
19
|
is_period_dtype,
|
|
19
20
|
is_string_dtype,
|
|
20
|
-
is_object_dtype,
|
|
21
21
|
)
|
|
22
22
|
|
|
23
23
|
from upgini.errors import ValidationError
|
|
24
24
|
from upgini.http import ProgressStage, SearchProgress, _RestClient
|
|
25
25
|
from upgini.metadata import (
|
|
26
|
-
ENTITY_SYSTEM_RECORD_ID,
|
|
27
26
|
EVAL_SET_INDEX,
|
|
28
|
-
SEARCH_KEY_UNNEST,
|
|
29
27
|
SYSTEM_COLUMNS,
|
|
30
28
|
SYSTEM_RECORD_ID,
|
|
31
29
|
TARGET,
|
|
@@ -81,7 +79,6 @@ class Dataset: # (pd.DataFrame):
|
|
|
81
79
|
path: Optional[str] = None,
|
|
82
80
|
meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
|
|
83
81
|
search_keys: Optional[List[Tuple[str, ...]]] = None,
|
|
84
|
-
unnest_search_keys: Optional[Dict[str, str]] = None,
|
|
85
82
|
model_task_type: Optional[ModelTaskType] = None,
|
|
86
83
|
random_state: Optional[int] = None,
|
|
87
84
|
rest_client: Optional[_RestClient] = None,
|
|
@@ -98,7 +95,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
98
95
|
data = pd.read_csv(path, **kwargs)
|
|
99
96
|
else:
|
|
100
97
|
# try different separators: , ; \t ...
|
|
101
|
-
with open(path
|
|
98
|
+
with open(path) as csvfile:
|
|
102
99
|
sep = csv.Sniffer().sniff(csvfile.read(2048)).delimiter
|
|
103
100
|
kwargs["sep"] = sep
|
|
104
101
|
data = pd.read_csv(path, **kwargs)
|
|
@@ -116,7 +113,6 @@ class Dataset: # (pd.DataFrame):
|
|
|
116
113
|
self.description = description
|
|
117
114
|
self.meaning_types = meaning_types
|
|
118
115
|
self.search_keys = search_keys
|
|
119
|
-
self.unnest_search_keys = unnest_search_keys
|
|
120
116
|
self.ignore_columns = []
|
|
121
117
|
self.hierarchical_group_keys = []
|
|
122
118
|
self.hierarchical_subgroup_keys = []
|
|
@@ -176,7 +172,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
176
172
|
new_columns = []
|
|
177
173
|
dup_counter = 0
|
|
178
174
|
for column in self.data.columns:
|
|
179
|
-
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID
|
|
175
|
+
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
|
|
180
176
|
self.columns_renaming[column] = column
|
|
181
177
|
new_columns.append(column)
|
|
182
178
|
continue
|
|
@@ -255,7 +251,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
255
251
|
@staticmethod
|
|
256
252
|
def _ip_to_int(ip: Optional[_BaseAddress]) -> Optional[int]:
|
|
257
253
|
try:
|
|
258
|
-
if isinstance(ip, IPv4Address
|
|
254
|
+
if isinstance(ip, (IPv4Address, IPv6Address)):
|
|
259
255
|
return int(ip)
|
|
260
256
|
except Exception:
|
|
261
257
|
pass
|
|
@@ -263,7 +259,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
263
259
|
@staticmethod
|
|
264
260
|
def _ip_to_int_str(ip: Optional[_BaseAddress]) -> Optional[str]:
|
|
265
261
|
try:
|
|
266
|
-
if isinstance(ip, IPv4Address
|
|
262
|
+
if isinstance(ip, (IPv4Address, IPv6Address)):
|
|
267
263
|
return str(int(ip))
|
|
268
264
|
except Exception:
|
|
269
265
|
pass
|
|
@@ -357,9 +353,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
357
353
|
|
|
358
354
|
if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
|
|
359
355
|
try:
|
|
360
|
-
self.data[postal_code] = (
|
|
361
|
-
self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
|
|
362
|
-
)
|
|
356
|
+
self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
|
|
363
357
|
except Exception:
|
|
364
358
|
pass
|
|
365
359
|
elif is_float_dtype(self.data[postal_code]):
|
|
@@ -809,9 +803,6 @@ class Dataset: # (pd.DataFrame):
|
|
|
809
803
|
meaningType=meaning_type,
|
|
810
804
|
minMaxValues=min_max_values,
|
|
811
805
|
)
|
|
812
|
-
if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
|
|
813
|
-
column_meta.isUnnest = True
|
|
814
|
-
column_meta.unnestKeyNames = self.unnest_search_keys[column_meta.originalName]
|
|
815
806
|
|
|
816
807
|
columns.append(column_meta)
|
|
817
808
|
|
|
@@ -16,7 +16,7 @@ class UnauthorizedError(HttpError):
|
|
|
16
16
|
"""Unauthorized error from REST API."""
|
|
17
17
|
|
|
18
18
|
def __init__(self, message, status_code):
|
|
19
|
-
message = "Unauthorized, please check your authorization token ({})"
|
|
19
|
+
message = f"Unauthorized, please check your authorization token ({message})"
|
|
20
20
|
super(UnauthorizedError, self).__init__(message, status_code)
|
|
21
21
|
|
|
22
22
|
|