upgini 1.1.166a2__tar.gz → 1.1.168__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (68) hide show
  1. {upgini-1.1.166a2/src/upgini.egg-info → upgini-1.1.168}/PKG-INFO +1 -1
  2. {upgini-1.1.166a2 → upgini-1.1.168}/setup.py +1 -1
  3. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/dataset.py +3 -3
  4. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/features_enricher.py +4 -3
  5. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/metrics.py +9 -2
  6. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/resource_bundle/strings.properties +1 -1
  7. {upgini-1.1.166a2 → upgini-1.1.168/src/upgini.egg-info}/PKG-INFO +1 -1
  8. {upgini-1.1.166a2 → upgini-1.1.168}/LICENSE +0 -0
  9. {upgini-1.1.166a2 → upgini-1.1.168}/README.md +0 -0
  10. {upgini-1.1.166a2 → upgini-1.1.168}/pyproject.toml +0 -0
  11. {upgini-1.1.166a2 → upgini-1.1.168}/setup.cfg +0 -0
  12. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/__init__.py +0 -0
  13. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/ads.py +0 -0
  14. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/ads_management/__init__.py +0 -0
  15. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/ads_management/ads_manager.py +0 -0
  16. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/data_source/__init__.py +0 -0
  17. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/data_source/data_source_publisher.py +0 -0
  18. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/errors.py +0 -0
  19. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/fingerprint.js +0 -0
  20. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/http.py +0 -0
  21. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/mdc/__init__.py +0 -0
  22. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/mdc/context.py +0 -0
  23. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/metadata.py +0 -0
  24. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/normalizer/__init__.py +0 -0
  25. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/normalizer/phone_normalizer.py +0 -0
  26. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/resource_bundle/__init__.py +0 -0
  27. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/resource_bundle/exceptions.py +0 -0
  28. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/sampler/__init__.py +0 -0
  29. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/sampler/base.py +0 -0
  30. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/sampler/random_under_sampler.py +0 -0
  31. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/sampler/utils.py +0 -0
  32. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/search_task.py +0 -0
  33. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/spinner.py +0 -0
  34. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/__init__.py +0 -0
  35. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/base_search_key_detector.py +0 -0
  36. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/blocked_time_series.py +0 -0
  37. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/country_utils.py +0 -0
  38. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/custom_loss_utils.py +0 -0
  39. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/cv_utils.py +0 -0
  40. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/datetime_utils.py +0 -0
  41. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/display_utils.py +0 -0
  42. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/email_utils.py +0 -0
  43. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/features_validator.py +0 -0
  44. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/format.py +0 -0
  45. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/ip_utils.py +0 -0
  46. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/phone_utils.py +0 -0
  47. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/postal_code_utils.py +0 -0
  48. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/target_utils.py +0 -0
  49. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/track_info.py +0 -0
  50. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/utils/warning_counter.py +0 -0
  51. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini/version_validator.py +0 -0
  52. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini.egg-info/SOURCES.txt +0 -0
  53. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini.egg-info/dependency_links.txt +0 -0
  54. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini.egg-info/requires.txt +0 -0
  55. {upgini-1.1.166a2 → upgini-1.1.168}/src/upgini.egg-info/top_level.txt +0 -0
  56. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_binary_dataset.py +0 -0
  57. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_blocked_time_series.py +0 -0
  58. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_categorical_dataset.py +0 -0
  59. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_continuous_dataset.py +0 -0
  60. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_country_utils.py +0 -0
  61. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_custom_loss_utils.py +0 -0
  62. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_datetime_utils.py +0 -0
  63. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_email_utils.py +0 -0
  64. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_etalon_validation.py +0 -0
  65. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_features_enricher.py +0 -0
  66. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_metrics.py +0 -0
  67. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_phone_utils.py +0 -0
  68. {upgini-1.1.166a2 → upgini-1.1.168}/tests/test_postal_code_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.166a2
3
+ Version: 1.1.168
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -35,7 +35,7 @@ def send_log(msg: str):
35
35
 
36
36
 
37
37
  here = Path(__file__).parent.resolve()
38
- version = "1.1.166a2"
38
+ version = "1.1.168"
39
39
  try:
40
40
  send_log(f"Start setup PyLib version {version}")
41
41
  setup(
@@ -45,11 +45,11 @@ from upgini.utils.warning_counter import WarningCounter
45
45
 
46
46
  class Dataset: # (pd.DataFrame):
47
47
  MIN_ROWS_COUNT = 100
48
- MAX_ROWS = 300_000
48
+ MAX_ROWS = 200_000
49
49
  FIT_SAMPLE_ROWS = 200_000
50
50
  FIT_SAMPLE_THRESHOLD = 200_000
51
- FIT_SAMPLE_WITH_EVAL_SET_ROWS = 300_000
52
- FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD = 300_000
51
+ FIT_SAMPLE_WITH_EVAL_SET_ROWS = 200_000
52
+ FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD = 200_000
53
53
  MIN_SAMPLE_THRESHOLD = 20_000
54
54
  IMBALANCE_THESHOLD = 0.4
55
55
  MIN_TARGET_CLASS_ROWS = 100
@@ -111,6 +111,7 @@ class FeaturesEnricher(TransformerMixin):
111
111
  RANDOM_STATE = 42
112
112
  CALCULATE_METRICS_THRESHOLD = 50_000_000
113
113
  CALCULATE_METRICS_MIN_THRESHOLD = 500
114
+ GENERATE_FEATURES_LIMIT = 10
114
115
  EMPTY_FEATURES_INFO = pd.DataFrame(
115
116
  columns=[
116
117
  bundle.get("features_info_provider"),
@@ -211,8 +212,8 @@ class FeaturesEnricher(TransformerMixin):
211
212
  self.generate_features = generate_features
212
213
  self.round_embeddings = round_embeddings
213
214
  if generate_features is not None:
214
- if len(generate_features) > 2:
215
- msg = bundle.get("too_many_generate_features")
215
+ if len(generate_features) > self.GENERATE_FEATURES_LIMIT:
216
+ msg = bundle.get("too_many_generate_features").format(self.GENERATE_FEATURES_LIMIT)
216
217
  self.logger.error(msg)
217
218
  raise ValidationError(msg)
218
219
  self.runtime_parameters.properties["generate_features"] = ",".join(generate_features)
@@ -1755,7 +1756,7 @@ class FeaturesEnricher(TransformerMixin):
1755
1756
  and len(self._search_task.unused_features_for_generation) > 0
1756
1757
  ):
1757
1758
  unused_features_for_generation = [
1758
- dataset.columns_renaming.get(col) for col in self._search_task.unused_features_for_generation
1759
+ dataset.columns_renaming.get(col) or col for col in self._search_task.unused_features_for_generation
1759
1760
  ]
1760
1761
  msg = bundle.get("features_not_generated").format(unused_features_for_generation)
1761
1762
  self.logger.warning(msg)
@@ -8,7 +8,15 @@ from catboost import CatBoostClassifier, CatBoostRegressor
8
8
  from lightgbm import LGBMClassifier, LGBMRegressor
9
9
  from numpy import log1p
10
10
  from pandas.api.types import is_numeric_dtype
11
- from sklearn.metrics import get_scorer_names, check_scoring, get_scorer, make_scorer
11
+ from sklearn.metrics import check_scoring, get_scorer, make_scorer
12
+
13
+ try:
14
+ from sklearn.metrics import get_scorer_names
15
+ available_scorers = get_scorer_names()
16
+ except ImportError:
17
+ from sklearn.metrics._scorer import SCORERS
18
+ available_scorers = SCORERS
19
+
12
20
  from sklearn.metrics._regression import (
13
21
  _check_reg_targets,
14
22
  check_consistent_length,
@@ -385,7 +393,6 @@ def _get_scorer(target_type: ModelTaskType, scoring: Union[Callable, str, None])
385
393
 
386
394
  multiplier = 1
387
395
  if isinstance(scoring, str):
388
- available_scorers = get_scorer_names()
389
396
  metric_name = scoring
390
397
  if "mean_squared_log_error" == metric_name or "MSLE" == metric_name or "msle" == metric_name:
391
398
  scoring = make_scorer(_ext_mean_squared_log_error, greater_is_better=False)
@@ -59,7 +59,7 @@ no_connection_to_upgini=No connection to Upgini server https://search.upgini.com
59
59
  no_internet_connection=No internet connection from Jupyter server {} to initiate external data search with Upgini service, please try with Google Colab https://colab.research.google.com
60
60
  access_denied=Access denied
61
61
  unsupported_search_key=Search key {} not supported
62
- too_many_generate_features=Too many columns passed in `generate_features` argument. Only two columns supported to generate features now
62
+ too_many_generate_features=Too many columns passed in `generate_features` argument. Only {} columns supported to generate features now
63
63
  invalid_round_embeddings=Argument `round_embeddings` should be non negative integer
64
64
  no_important_features_for_transform=There are no important features for transform. Return input as transformed
65
65
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.166a2
3
+ Version: 1.1.168
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes