upgini 1.1.244a25__py3-none-any.whl → 1.1.245a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/dataset.py +59 -53
- upgini/features_enricher.py +198 -185
- upgini/metrics.py +1 -0
- upgini/resource_bundle/__init__.py +14 -1
- upgini/utils/target_utils.py +8 -2
- {upgini-1.1.244a25.dist-info → upgini-1.1.245a1.dist-info}/METADATA +7 -7
- {upgini-1.1.244a25.dist-info → upgini-1.1.245a1.dist-info}/RECORD +10 -10
- {upgini-1.1.244a25.dist-info → upgini-1.1.245a1.dist-info}/WHEEL +1 -1
- {upgini-1.1.244a25.dist-info → upgini-1.1.245a1.dist-info}/LICENSE +0 -0
- {upgini-1.1.244a25.dist-info → upgini-1.1.245a1.dist-info}/top_level.txt +0 -0
upgini/metrics.py
CHANGED
|
@@ -8,7 +8,7 @@ import os
|
|
|
8
8
|
import re
|
|
9
9
|
from os import PathLike
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import KeysView, Sequence
|
|
11
|
+
from typing import KeysView, Optional, Sequence
|
|
12
12
|
|
|
13
13
|
from .exceptions import MalformedResourceBundleError, NotInResourceBundleError
|
|
14
14
|
|
|
@@ -221,3 +221,16 @@ def get_bundle(bundle_name: str, locale: str | Sequence[str | str] = None, path:
|
|
|
221
221
|
|
|
222
222
|
|
|
223
223
|
bundle = ResourceBundle("strings", None, path=os.path.dirname(os.path.realpath(__file__)))
|
|
224
|
+
custom_bundles = dict()
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def get_custom_bundle(custom_cfg: Optional[str] = None) -> "ResourceBundle":
|
|
228
|
+
global custom_bundles
|
|
229
|
+
if custom_cfg is not None:
|
|
230
|
+
custom_bundle = custom_bundles.get(custom_cfg)
|
|
231
|
+
if custom_bundle is None:
|
|
232
|
+
custom_bundle = ResourceBundle("strings", custom_cfg, path=os.path.dirname(os.path.realpath(__file__)))
|
|
233
|
+
custom_bundles[custom_cfg] = custom_bundle
|
|
234
|
+
return custom_bundle
|
|
235
|
+
else:
|
|
236
|
+
return bundle
|
upgini/utils/target_utils.py
CHANGED
|
@@ -7,7 +7,7 @@ from pandas.api.types import is_numeric_dtype
|
|
|
7
7
|
|
|
8
8
|
from upgini.errors import ValidationError
|
|
9
9
|
from upgini.metadata import ModelTaskType
|
|
10
|
-
from upgini.resource_bundle import
|
|
10
|
+
from upgini.resource_bundle import ResourceBundle, get_custom_bundle
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def correct_string_target(y: Union[pd.Series, np.ndarray]) -> Union[pd.Series, np.ndarray]:
|
|
@@ -18,8 +18,13 @@ def correct_string_target(y: Union[pd.Series, np.ndarray]) -> Union[pd.Series, n
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def define_task(
|
|
21
|
-
y: pd.Series,
|
|
21
|
+
y: pd.Series,
|
|
22
|
+
has_date: bool = False,
|
|
23
|
+
logger: Optional[logging.Logger] = None,
|
|
24
|
+
silent: bool = False,
|
|
25
|
+
bundle: Optional[ResourceBundle] = None,
|
|
22
26
|
) -> ModelTaskType:
|
|
27
|
+
bundle = bundle or get_custom_bundle()
|
|
23
28
|
if logger is None:
|
|
24
29
|
logger = logging.getLogger()
|
|
25
30
|
target = y.dropna()
|
|
@@ -61,6 +66,7 @@ def define_task(
|
|
|
61
66
|
task = ModelTaskType.REGRESSION
|
|
62
67
|
else:
|
|
63
68
|
task = ModelTaskType.MULTICLASS
|
|
69
|
+
|
|
64
70
|
logger.info(f"Detected task type: {task}")
|
|
65
71
|
if not silent:
|
|
66
72
|
print(bundle.get("target_type_detected").format(task))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.245a1
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Home-page: https://upgini.com/
|
|
6
6
|
Author: Upgini Developers
|
|
@@ -45,8 +45,8 @@ Requires-Dist: ipywidgets (>=8.1.0)
|
|
|
45
45
|
<!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> : low-code feature search and enrichment library for machine learning </h2> -->
|
|
46
46
|
<!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> : Free automated data enrichment library for machine learning: </br>only the accuracy improving features in 2 minutes </h2> -->
|
|
47
47
|
<!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> • Free production-ready automated data enrichment library for machine learning</h2>-->
|
|
48
|
-
<h2 align="center"> <a href="https://upgini.com/">Upgini • Intelligent data search & enrichment for Machine Learning</a></h2>
|
|
49
|
-
<p align="center"> <b>Easily find and add relevant features to your ML pipeline from</br> hundreds of public, community and premium external data sources, </br>
|
|
48
|
+
<h2 align="center"> <a href="https://upgini.com/">Upgini • Intelligent data search & enrichment for Machine Learning and AI</a></h2>
|
|
49
|
+
<p align="center"> <b>Easily find and add relevant features to your ML & AI pipeline from</br> hundreds of public, community and premium external data sources, </br>including open & commercial LLMs</b> </p>
|
|
50
50
|
<p align="center">
|
|
51
51
|
<br />
|
|
52
52
|
<a href="https://colab.research.google.com/github/upgini/upgini/blob/main/notebooks/Upgini_Features_search%26generation.ipynb"><strong>Quick Start in Colab »</strong></a> |
|
|
@@ -70,7 +70,7 @@ Requires-Dist: ipywidgets (>=8.1.0)
|
|
|
70
70
|
[](https://gitter.im/upgini/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) -->
|
|
71
71
|
## ❔ Overview
|
|
72
72
|
|
|
73
|
-
**Upgini** is an intelligent data search engine with a Python library that helps you find and add relevant features to your ML pipeline from hundreds of public, community, and premium external data sources. Under the hood, Upgini automatically optimizes all connected data sources by [generating an optimal set of machine ML features
|
|
73
|
+
**Upgini** is an intelligent data search engine with a Python library that helps you find and add relevant features to your ML pipeline from hundreds of public, community, and premium external data sources. Under the hood, Upgini automatically optimizes all connected data sources by [generating an optimal set of machine ML features using large language models (LLMs), GraphNNs and recurrent neural networks (RNNs)](https://upgini.com/#optimized_external_data).
|
|
74
74
|
|
|
75
75
|
**Motivation:** for most supervised ML models external data & features boost accuracy significantly better than any hyperparameters tuning. But lack of automated and time-efficient enrichment tools for external data blocks massive adoption of external features in ML pipelines. We want radically simplify features search and enrichment to make external data a standard approach. Like a hyperparameter tuning for machine learning nowadays.
|
|
76
76
|
|
|
@@ -78,9 +78,9 @@ Requires-Dist: ipywidgets (>=8.1.0)
|
|
|
78
78
|
|
|
79
79
|
## 🚀 Awesome features
|
|
80
80
|
⭐️ Automatically find only relevant features that *give accuracy improvement for ML model*. Not just correlated with target variable, what 9 out of 10 cases gives zero accuracy improvement
|
|
81
|
-
⭐️
|
|
82
|
-
⭐️
|
|
83
|
-
⭐️ Calculate
|
|
81
|
+
⭐️ Automated feature generation from the sources: feature generation with Large Language Models' data augmentation, RNNs, GraphNN; multiple data source ensembling
|
|
82
|
+
⭐️ Automatic search key augmentation from all connected sources. If you do not have all search keys in your search request, such as postal/zip code, Upgini will try to add those keys based on the provided set of search keys. This will broaden the search across all available data sources
|
|
83
|
+
⭐️ Calculate accuracy metrics and uplifts after enrichment existing ML model with external features
|
|
84
84
|
⭐️ Check the stability of accuracy gain from external data on out-of-time intervals and verification datasets. Mitigate risks of unstable external data dependencies in ML pipeline
|
|
85
85
|
⭐️ Easy to use - single request to enrich training dataset with [*all of the keys at once*](#-search-key-types-we-support-more-to-come):
|
|
86
86
|
<table>
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
2
2
|
upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
|
|
3
|
-
upgini/dataset.py,sha256=
|
|
3
|
+
upgini/dataset.py,sha256=AsDJmEfVvdnBrIXQ2DSjitnTQ-5uu1H59YkhjhBRXcw,50424
|
|
4
4
|
upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
|
|
5
|
-
upgini/features_enricher.py,sha256=
|
|
5
|
+
upgini/features_enricher.py,sha256=DpSKMU9cyDWAn494MF_7WYl4uTWh3_g410n205WYh58,167803
|
|
6
6
|
upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
|
|
7
7
|
upgini/http.py,sha256=eSG4gOpmCGlXmB6KIPNzAG8tRZNUjyYpMeUeHw_2li4,42264
|
|
8
8
|
upgini/metadata.py,sha256=55t0uQI910tzTcnwxZCUL1413BhTiSm8oqiwp-94NyA,9613
|
|
9
|
-
upgini/metrics.py,sha256=
|
|
9
|
+
upgini/metrics.py,sha256=LS2MgEKgmn9VEXsKzxv3pBZ-q71mTnpWu6vL8fYgpo4,26727
|
|
10
10
|
upgini/search_task.py,sha256=5n4qGJmtu48s0-FHAtF3L5qVLMd1JVW3FJlM8dFbh-s,17063
|
|
11
11
|
upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
|
|
12
12
|
upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
|
|
@@ -26,7 +26,7 @@ upgini/mdc/__init__.py,sha256=ETDh3JKbrDdPMOECiYLAa8lvKYe68mv4IY6fZa9FimA,1126
|
|
|
26
26
|
upgini/mdc/context.py,sha256=Sl1S_InKlzzRxYqwJ2k24lawJdCKWgGJ-RIRfvzWJrk,1468
|
|
27
27
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
28
|
upgini/normalizer/phone_normalizer.py,sha256=lhwsPEnfyjeIsndW2EcQGZksXYsfxaQ1ghAzVYoDRKM,9927
|
|
29
|
-
upgini/resource_bundle/__init__.py,sha256=
|
|
29
|
+
upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
|
|
30
30
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
31
31
|
upgini/resource_bundle/strings.properties,sha256=0qDpfZBMU4moFXKzb11ALwk5N8FdgyJYzI0wRTvFJbI,25008
|
|
32
32
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -51,11 +51,11 @@ upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,4
|
|
|
51
51
|
upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
|
|
52
52
|
upgini/utils/progress_bar.py,sha256=iNXyqT3vKCeHpfiG5HHwr7Lk2cTtKViM93Fl8iZnjGc,1564
|
|
53
53
|
upgini/utils/sklearn_ext.py,sha256=fvuTWJ5AnT3ED9KSaQu_yIgW2JR19hFlaGDoVP3k60g,44027
|
|
54
|
-
upgini/utils/target_utils.py,sha256=
|
|
54
|
+
upgini/utils/target_utils.py,sha256=tZoSdrn74xzFOX-AO3KxEd_6utCNGp6DUciWhjlsFV0,2691
|
|
55
55
|
upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
|
|
56
56
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
57
|
-
upgini-1.1.
|
|
58
|
-
upgini-1.1.
|
|
59
|
-
upgini-1.1.
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
57
|
+
upgini-1.1.245a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
58
|
+
upgini-1.1.245a1.dist-info/METADATA,sha256=0zG-bWLQ9yLlpGZrFgQCiPtQvVj_G7HbNJQpFfh4pj0,48234
|
|
59
|
+
upgini-1.1.245a1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
60
|
+
upgini-1.1.245a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
|
|
61
|
+
upgini-1.1.245a1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|