upgini 1.1.244a24__py3-none-any.whl → 1.1.245a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/metrics.py CHANGED
@@ -77,6 +77,7 @@ CATBOOST_MULTICLASS_PARAMS = {
77
77
  "verbose": False,
78
78
  "random_state": DEFAULT_RANDOM_STATE,
79
79
  "allow_writing_files": False,
80
+ "auto_class_weights": "Balanced",
80
81
  }
81
82
 
82
83
  LIGHTGBM_PARAMS = {
@@ -8,7 +8,7 @@ import os
8
8
  import re
9
9
  from os import PathLike
10
10
  from pathlib import Path
11
- from typing import KeysView, Sequence
11
+ from typing import KeysView, Optional, Sequence
12
12
 
13
13
  from .exceptions import MalformedResourceBundleError, NotInResourceBundleError
14
14
 
@@ -221,3 +221,16 @@ def get_bundle(bundle_name: str, locale: str | Sequence[str | str] = None, path:
221
221
 
222
222
 
223
223
  bundle = ResourceBundle("strings", None, path=os.path.dirname(os.path.realpath(__file__)))
224
+ custom_bundles = dict()
225
+
226
+
227
+ def get_custom_bundle(custom_cfg: Optional[str] = None) -> "ResourceBundle":
228
+ global custom_bundles
229
+ if custom_cfg is not None:
230
+ custom_bundle = custom_bundles.get(custom_cfg)
231
+ if custom_bundle is None:
232
+ custom_bundle = ResourceBundle("strings", custom_cfg, path=os.path.dirname(os.path.realpath(__file__)))
233
+ custom_bundles[custom_cfg] = custom_bundle
234
+ return custom_bundle
235
+ else:
236
+ return bundle
@@ -7,7 +7,7 @@ from pandas.api.types import is_numeric_dtype
7
7
 
8
8
  from upgini.errors import ValidationError
9
9
  from upgini.metadata import ModelTaskType
10
- from upgini.resource_bundle import bundle
10
+ from upgini.resource_bundle import ResourceBundle, get_custom_bundle
11
11
 
12
12
 
13
13
  def correct_string_target(y: Union[pd.Series, np.ndarray]) -> Union[pd.Series, np.ndarray]:
@@ -18,8 +18,13 @@ def correct_string_target(y: Union[pd.Series, np.ndarray]) -> Union[pd.Series, n
18
18
 
19
19
 
20
20
  def define_task(
21
- y: pd.Series, has_date: bool = False, logger: Optional[logging.Logger] = None, silent: bool = False
21
+ y: pd.Series,
22
+ has_date: bool = False,
23
+ logger: Optional[logging.Logger] = None,
24
+ silent: bool = False,
25
+ bundle: Optional[ResourceBundle] = None,
22
26
  ) -> ModelTaskType:
27
+ bundle = bundle or get_custom_bundle()
23
28
  if logger is None:
24
29
  logger = logging.getLogger()
25
30
  target = y.dropna()
@@ -61,6 +66,7 @@ def define_task(
61
66
  task = ModelTaskType.REGRESSION
62
67
  else:
63
68
  task = ModelTaskType.MULTICLASS
69
+
64
70
  logger.info(f"Detected task type: {task}")
65
71
  if not silent:
66
72
  print(bundle.get("target_type_detected").format(task))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.244a24
3
+ Version: 1.1.245a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -45,8 +45,8 @@ Requires-Dist: ipywidgets (>=8.1.0)
45
45
  <!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> : low-code feature search and enrichment library for machine learning </h2> -->
46
46
  <!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> : Free automated data enrichment library for machine learning: </br>only the accuracy improving features in 2 minutes </h2> -->
47
47
  <!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> • Free production-ready automated data enrichment library for machine learning</h2>-->
48
- <h2 align="center"> <a href="https://upgini.com/">Upgini • Intelligent data search & enrichment for Machine Learning</a></h2>
49
- <p align="center"> <b>Easily find and add relevant features to your ML pipeline from</br> hundreds of public, community and premium external data sources, </br>optimized for ML models with LLMs and other neural networks</b> </p>
48
+ <h2 align="center"> <a href="https://upgini.com/">Upgini • Intelligent data search & enrichment for Machine Learning and AI</a></h2>
49
+ <p align="center"> <b>Easily find and add relevant features to your ML & AI pipeline from</br> hundreds of public, community and premium external data sources, </br>including open & commercial LLMs</b> </p>
50
50
  <p align="center">
51
51
  <br />
52
52
  <a href="https://colab.research.google.com/github/upgini/upgini/blob/main/notebooks/Upgini_Features_search%26generation.ipynb"><strong>Quick Start in Colab »</strong></a> |
@@ -70,7 +70,7 @@ Requires-Dist: ipywidgets (>=8.1.0)
70
70
  [![Gitter Сommunity](https://img.shields.io/badge/gitter-@upgini-teal.svg?logo=gitter)](https://gitter.im/upgini/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) -->
71
71
  ## ❔ Overview
72
72
 
73
- **Upgini** is an intelligent data search engine with a Python library that helps you find and add relevant features to your ML pipeline from hundreds of public, community, and premium external data sources. Under the hood, Upgini automatically optimizes all connected data sources by [generating an optimal set of machine ML features from the source data using large language models (LLMs), GraphNNs and recurrent neural networks (RNNs)](https://upgini.com/#optimized_external_data).
73
+ **Upgini** is an intelligent data search engine with a Python library that helps you find and add relevant features to your ML pipeline from hundreds of public, community, and premium external data sources. Under the hood, Upgini automatically optimizes all connected data sources by [generating an optimal set of machine ML features using large language models (LLMs), GraphNNs and recurrent neural networks (RNNs)](https://upgini.com/#optimized_external_data).
74
74
 
75
75
  **Motivation:** for most supervised ML models external data & features boost accuracy significantly better than any hyperparameters tuning. But lack of automated and time-efficient enrichment tools for external data blocks massive adoption of external features in ML pipelines. We want radically simplify features search and enrichment to make external data a standard approach. Like a hyperparameter tuning for machine learning nowadays.
76
76
 
@@ -78,9 +78,9 @@ Requires-Dist: ipywidgets (>=8.1.0)
78
78
 
79
79
  ## 🚀 Awesome features
80
80
  ⭐️ Automatically find only relevant features that *give accuracy improvement for ML model*. Not just correlated with target variable, what 9 out of 10 cases gives zero accuracy improvement
81
- ⭐️ Data source optimizations: automated feature generation with Large Language Models' data augmentation, RNNs, GraphNN; multiple data source ensembling
82
- ⭐️ *Automatic search key augmentation* from all connected sources. If you do not have all search keys in your search request, such as postal/zip code, Upgini will try to add those keys based on the provided set of search keys. This will broaden the search across all available data sources
83
- ⭐️ Calculate *accuracy metrics and uplifts* after enrichment existing ML model with external features
81
+ ⭐️ Automated feature generation from the sources: feature generation with Large Language Models' data augmentation, RNNs, GraphNN; multiple data source ensembling
82
+ ⭐️ Automatic search key augmentation from all connected sources. If you do not have all search keys in your search request, such as postal/zip code, Upgini will try to add those keys based on the provided set of search keys. This will broaden the search across all available data sources
83
+ ⭐️ Calculate accuracy metrics and uplifts after enrichment existing ML model with external features
84
84
  ⭐️ Check the stability of accuracy gain from external data on out-of-time intervals and verification datasets. Mitigate risks of unstable external data dependencies in ML pipeline
85
85
  ⭐️ Easy to use - single request to enrich training dataset with [*all of the keys at once*](#-search-key-types-we-support-more-to-come):
86
86
  <table>
@@ -1,12 +1,12 @@
1
1
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
2
2
  upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
3
- upgini/dataset.py,sha256=_k0J-YcXt-wsihv9WRWUE6tb_h6bbwELauJnp_A1an4,49697
3
+ upgini/dataset.py,sha256=AsDJmEfVvdnBrIXQ2DSjitnTQ-5uu1H59YkhjhBRXcw,50424
4
4
  upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
5
- upgini/features_enricher.py,sha256=Z9PaDRGEFJUYzxd0VIPYH1N7I1EiCGkQLu0mRP98lOI,166504
5
+ upgini/features_enricher.py,sha256=DpSKMU9cyDWAn494MF_7WYl4uTWh3_g410n205WYh58,167803
6
6
  upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
7
7
  upgini/http.py,sha256=eSG4gOpmCGlXmB6KIPNzAG8tRZNUjyYpMeUeHw_2li4,42264
8
8
  upgini/metadata.py,sha256=55t0uQI910tzTcnwxZCUL1413BhTiSm8oqiwp-94NyA,9613
9
- upgini/metrics.py,sha256=WL0-JSJXjl8jTdEzPDhXUeyd_QQtB09gNOltZDLuWTI,26689
9
+ upgini/metrics.py,sha256=LS2MgEKgmn9VEXsKzxv3pBZ-q71mTnpWu6vL8fYgpo4,26727
10
10
  upgini/search_task.py,sha256=5n4qGJmtu48s0-FHAtF3L5qVLMd1JVW3FJlM8dFbh-s,17063
11
11
  upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
12
12
  upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
@@ -26,7 +26,7 @@ upgini/mdc/__init__.py,sha256=ETDh3JKbrDdPMOECiYLAa8lvKYe68mv4IY6fZa9FimA,1126
26
26
  upgini/mdc/context.py,sha256=Sl1S_InKlzzRxYqwJ2k24lawJdCKWgGJ-RIRfvzWJrk,1468
27
27
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  upgini/normalizer/phone_normalizer.py,sha256=lhwsPEnfyjeIsndW2EcQGZksXYsfxaQ1ghAzVYoDRKM,9927
29
- upgini/resource_bundle/__init__.py,sha256=M7GtS7KPQw9pinz8P2aQWXpSkD2YFwUPVGk1w92Pn84,7888
29
+ upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
30
30
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
31
31
  upgini/resource_bundle/strings.properties,sha256=0qDpfZBMU4moFXKzb11ALwk5N8FdgyJYzI0wRTvFJbI,25008
32
32
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -51,11 +51,11 @@ upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,4
51
51
  upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
52
52
  upgini/utils/progress_bar.py,sha256=iNXyqT3vKCeHpfiG5HHwr7Lk2cTtKViM93Fl8iZnjGc,1564
53
53
  upgini/utils/sklearn_ext.py,sha256=fvuTWJ5AnT3ED9KSaQu_yIgW2JR19hFlaGDoVP3k60g,44027
54
- upgini/utils/target_utils.py,sha256=VgEGA4v5soWE9H5mQUPUEg8fnIXXWXDVHDBjq2oPDCk,2562
54
+ upgini/utils/target_utils.py,sha256=tZoSdrn74xzFOX-AO3KxEd_6utCNGp6DUciWhjlsFV0,2691
55
55
  upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
56
56
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
57
- upgini-1.1.244a24.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
58
- upgini-1.1.244a24.dist-info/METADATA,sha256=w0-iq4DCF3wHO8QBf20W3EHMhNdjrKJRN9IakMZYAcs,48265
59
- upgini-1.1.244a24.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
60
- upgini-1.1.244a24.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
61
- upgini-1.1.244a24.dist-info/RECORD,,
57
+ upgini-1.1.245a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
58
+ upgini-1.1.245a1.dist-info/METADATA,sha256=0zG-bWLQ9yLlpGZrFgQCiPtQvVj_G7HbNJQpFfh4pj0,48234
59
+ upgini-1.1.245a1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
60
+ upgini-1.1.245a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
61
+ upgini-1.1.245a1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.37.1)
2
+ Generator: bdist_wheel (0.40.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5