upgini 1.2.9a3__tar.gz → 1.2.9a99__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.9a3 → upgini-1.2.9a99}/PKG-INFO +2 -1
- {upgini-1.2.9a3 → upgini-1.2.9a99}/pyproject.toml +1 -1
- upgini-1.2.9a99/src/upgini/__about__.py +1 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/features_enricher.py +2 -2
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/metrics.py +3 -3
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/features_validator.py +2 -3
- upgini-1.2.9a3/src/upgini/__about__.py +0 -1
- {upgini-1.2.9a3 → upgini-1.2.9a99}/.gitignore +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/LICENSE +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/README.md +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/__init__.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/ads.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/dataset.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/errors.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/http.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/metadata.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/search_task.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/spinner.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.9a3 → upgini-1.2.9a99}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.9a99
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -28,6 +28,7 @@ Requires-Dist: fastparquet>=0.8.1
|
|
|
28
28
|
Requires-Dist: ipywidgets>=8.1.0
|
|
29
29
|
Requires-Dist: jarowinkler>=2.0.0
|
|
30
30
|
Requires-Dist: levenshtein>=0.25.1
|
|
31
|
+
Requires-Dist: lightgbm>=3.3.2
|
|
31
32
|
Requires-Dist: numpy<=1.26.4,>=1.19.0
|
|
32
33
|
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
33
34
|
Requires-Dist: pydantic<3.0.0,>1.0.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.9a99"
|
|
@@ -2531,7 +2531,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2531
2531
|
features_columns = [c for c in df.columns if c not in non_feature_columns]
|
|
2532
2532
|
|
|
2533
2533
|
features_to_drop = FeaturesValidator(self.logger).validate(
|
|
2534
|
-
df, features_columns, self.generate_features,
|
|
2534
|
+
df, features_columns, self.generate_features, self.warning_counter
|
|
2535
2535
|
)
|
|
2536
2536
|
self.fit_dropped_features.update(features_to_drop)
|
|
2537
2537
|
df = df.drop(columns=features_to_drop)
|
|
@@ -2657,7 +2657,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2657
2657
|
and len(self._search_task.unused_features_for_generation) > 0
|
|
2658
2658
|
):
|
|
2659
2659
|
unused_features_for_generation = [
|
|
2660
|
-
columns_renaming.get(col) or col for col in self._search_task.unused_features_for_generation
|
|
2660
|
+
dataset.columns_renaming.get(col) or col for col in self._search_task.unused_features_for_generation
|
|
2661
2661
|
]
|
|
2662
2662
|
msg = self.bundle.get("features_not_generated").format(unused_features_for_generation)
|
|
2663
2663
|
self.logger.warning(msg)
|
|
@@ -10,6 +10,7 @@ import catboost
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
import pandas as pd
|
|
12
12
|
from catboost import CatBoostClassifier, CatBoostRegressor
|
|
13
|
+
from lightgbm import LGBMClassifier, LGBMRegressor
|
|
13
14
|
from numpy import log1p
|
|
14
15
|
from pandas.api.types import is_numeric_dtype
|
|
15
16
|
from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
|
|
@@ -405,7 +406,6 @@ class EstimatorWrapper:
|
|
|
405
406
|
estimator = CatBoostWrapper(**kwargs)
|
|
406
407
|
else:
|
|
407
408
|
try:
|
|
408
|
-
from lightgbm import LGBMClassifier, LGBMRegressor
|
|
409
409
|
if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
|
|
410
410
|
estimator = LightGBMWrapper(**kwargs)
|
|
411
411
|
else:
|
|
@@ -486,7 +486,7 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
|
486
486
|
|
|
487
487
|
# Find rest categorical features
|
|
488
488
|
self.cat_features = _get_cat_features(x, self.text_features, embedding_features)
|
|
489
|
-
|
|
489
|
+
x = fill_na_cat_features(x, self.cat_features) # Was commented
|
|
490
490
|
unique_cat_features = []
|
|
491
491
|
for name in self.cat_features:
|
|
492
492
|
# Remove constant categorical features
|
|
@@ -536,7 +536,7 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
|
536
536
|
x, emb_columns = self.group_embeddings(x)
|
|
537
537
|
params["embedding_features"] = emb_columns
|
|
538
538
|
if self.cat_features:
|
|
539
|
-
|
|
539
|
+
x = fill_na_cat_features(x, self.cat_features) # Was commented
|
|
540
540
|
params["cat_features"] = self.cat_features
|
|
541
541
|
|
|
542
542
|
return x, y, params
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from logging import Logger
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import List, Optional
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
from pandas.api.types import is_integer_dtype, is_object_dtype, is_string_dtype
|
|
@@ -22,7 +22,6 @@ class FeaturesValidator:
|
|
|
22
22
|
df: pd.DataFrame,
|
|
23
23
|
features: List[str],
|
|
24
24
|
features_for_generate: Optional[List[str]],
|
|
25
|
-
columns_renaming: Dict[str, str],
|
|
26
25
|
warning_counter: WarningCounter,
|
|
27
26
|
) -> List[str]:
|
|
28
27
|
# one_hot_encoded_features = []
|
|
@@ -64,7 +63,7 @@ class FeaturesValidator:
|
|
|
64
63
|
|
|
65
64
|
high_cardinality_features = self.find_high_cardinality(df[features])
|
|
66
65
|
if features_for_generate:
|
|
67
|
-
high_cardinality_features = [f for f in high_cardinality_features if
|
|
66
|
+
high_cardinality_features = [f for f in high_cardinality_features if f not in features_for_generate]
|
|
68
67
|
if high_cardinality_features:
|
|
69
68
|
msg = bundle.get("high_cardinality_features").format(high_cardinality_features)
|
|
70
69
|
print(msg)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.9a3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|