upgini 1.2.48__tar.gz → 1.2.50__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.48 → upgini-1.2.50}/PKG-INFO +1 -1
- upgini-1.2.50/src/upgini/__about__.py +1 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/dataset.py +10 -3
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/features_enricher.py +11 -5
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/resource_bundle/strings.properties +1 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/feature_info.py +7 -6
- upgini-1.2.48/src/upgini/__about__.py +0 -1
- {upgini-1.2.48 → upgini-1.2.50}/.gitignore +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/LICENSE +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/README.md +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/pyproject.toml +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/__init__.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/ads.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/errors.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/http.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/metadata.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/metrics.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/search_task.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/spinner.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.48 → upgini-1.2.50}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.50"
|
|
@@ -37,12 +37,18 @@ from upgini.metadata import (
|
|
|
37
37
|
from upgini.resource_bundle import ResourceBundle, get_custom_bundle
|
|
38
38
|
from upgini.search_task import SearchTask
|
|
39
39
|
from upgini.utils.email_utils import EmailSearchKeyConverter
|
|
40
|
-
from upgini.utils.target_utils import
|
|
40
|
+
from upgini.utils.target_utils import (
|
|
41
|
+
balance_undersample,
|
|
42
|
+
balance_undersample_forced,
|
|
43
|
+
balance_undersample_time_series,
|
|
44
|
+
)
|
|
41
45
|
|
|
42
46
|
try:
|
|
43
47
|
from upgini.utils.progress_bar import CustomProgressBar as ProgressBar
|
|
44
48
|
except Exception:
|
|
45
|
-
from upgini.utils.fallback_progress_bar import
|
|
49
|
+
from upgini.utils.fallback_progress_bar import (
|
|
50
|
+
CustomFallbackProgressBar as ProgressBar,
|
|
51
|
+
)
|
|
46
52
|
|
|
47
53
|
|
|
48
54
|
class Dataset: # (pd.DataFrame):
|
|
@@ -347,7 +353,8 @@ class Dataset: # (pd.DataFrame):
|
|
|
347
353
|
key
|
|
348
354
|
for search_group in self.search_keys_checked
|
|
349
355
|
for key in search_group
|
|
350
|
-
if
|
|
356
|
+
if key in self.columns_renaming
|
|
357
|
+
and not self.columns_renaming.get(key).endswith(EmailSearchKeyConverter.ONE_DOMAIN_SUFFIX)
|
|
351
358
|
}
|
|
352
359
|
ipv4_column = self.etalon_def_checked.get(FileColumnMeaningType.IP_ADDRESS.value)
|
|
353
360
|
if (
|
|
@@ -400,7 +400,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
400
400
|
remove_outliers_calc_metrics: Optional[bool] = None,
|
|
401
401
|
progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
|
|
402
402
|
search_id_callback: Optional[Callable[[str], Any]] = None,
|
|
403
|
-
select_features: bool =
|
|
403
|
+
select_features: bool = True,
|
|
404
404
|
**kwargs,
|
|
405
405
|
):
|
|
406
406
|
"""Fit to data.
|
|
@@ -543,7 +543,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
543
543
|
estimator: Optional[Any] = None,
|
|
544
544
|
remove_outliers_calc_metrics: Optional[bool] = None,
|
|
545
545
|
progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
|
|
546
|
-
select_features: bool =
|
|
546
|
+
select_features: bool = True,
|
|
547
547
|
**kwargs,
|
|
548
548
|
) -> pd.DataFrame:
|
|
549
549
|
"""Fit to data, then transform it.
|
|
@@ -1486,8 +1486,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1486
1486
|
for c in X_sampled.columns.to_list()
|
|
1487
1487
|
if (
|
|
1488
1488
|
not self.fit_select_features
|
|
1489
|
-
or c in set(self.feature_names_).union(self.id_columns)
|
|
1490
|
-
or (self.fit_columns_renaming or {}).get(c, c) in set(self.feature_names_).union(self.id_columns)
|
|
1489
|
+
or c in set(self.feature_names_).union(self.id_columns or [])
|
|
1490
|
+
or (self.fit_columns_renaming or {}).get(c, c) in set(self.feature_names_).union(self.id_columns or [])
|
|
1491
1491
|
)
|
|
1492
1492
|
and c
|
|
1493
1493
|
not in (
|
|
@@ -2619,6 +2619,11 @@ if response.status_code == 200:
|
|
|
2619
2619
|
self.generate_features = checked_generate_features
|
|
2620
2620
|
self.runtime_parameters.properties["generate_features"] = ",".join(self.generate_features)
|
|
2621
2621
|
|
|
2622
|
+
if self.id_columns is not None:
|
|
2623
|
+
for id_column in self.id_columns:
|
|
2624
|
+
if id_column not in validated_X.columns:
|
|
2625
|
+
raise ValidationError(self.bundle.get("missing_id_column").format(id_column))
|
|
2626
|
+
|
|
2622
2627
|
validate_scoring_argument(scoring)
|
|
2623
2628
|
|
|
2624
2629
|
self.__log_debug_information(
|
|
@@ -3742,7 +3747,8 @@ if response.status_code == 200:
|
|
|
3742
3747
|
self.feature_names_.append(feature_meta.name)
|
|
3743
3748
|
self.feature_importances_.append(_round_shap_value(feature_meta.shap_value))
|
|
3744
3749
|
|
|
3745
|
-
|
|
3750
|
+
df_for_sample = features_df if feature_meta.name in features_df.columns else self.X
|
|
3751
|
+
feature_info = FeatureInfo.from_metadata(feature_meta, df_for_sample, is_client_feature)
|
|
3746
3752
|
features_info.append(feature_info.to_row(self.bundle))
|
|
3747
3753
|
features_info_without_links.append(feature_info.to_row_without_links(self.bundle))
|
|
3748
3754
|
internal_features_info.append(feature_info.to_internal_row(self.bundle))
|
|
@@ -134,6 +134,7 @@ x_and_eval_x_diff_types=X and eval_set X has different types: {} and {}
|
|
|
134
134
|
baseline_score_column_not_exists=baseline_score_column {} doesn't exist in input dataframe
|
|
135
135
|
baseline_score_column_has_na=baseline_score_column contains NaN. Clear it and and retry
|
|
136
136
|
missing_features_for_transform=Missing some features for transform that were presented on fit: {}
|
|
137
|
+
missing_id_column=Id column {} not found in X
|
|
137
138
|
# target validation
|
|
138
139
|
empty_target=Target is empty in all rows
|
|
139
140
|
# non_numeric_target=Binary target should be numerical type
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
1
|
import itertools
|
|
3
|
-
from
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
@@ -8,7 +8,6 @@ import pandas as pd
|
|
|
8
8
|
from upgini.metadata import FeaturesMetadataV2
|
|
9
9
|
from upgini.resource_bundle import ResourceBundle
|
|
10
10
|
|
|
11
|
-
|
|
12
11
|
LLM_SOURCE = "LLM with external data augmentation"
|
|
13
12
|
|
|
14
13
|
|
|
@@ -30,7 +29,9 @@ class FeatureInfo:
|
|
|
30
29
|
data_source_link: str
|
|
31
30
|
|
|
32
31
|
@staticmethod
|
|
33
|
-
def from_metadata(
|
|
32
|
+
def from_metadata(
|
|
33
|
+
feature_meta: FeaturesMetadataV2, data: Optional[pd.DataFrame], is_client_feature: bool
|
|
34
|
+
) -> "FeatureInfo":
|
|
34
35
|
return FeatureInfo(
|
|
35
36
|
name=_get_name(feature_meta),
|
|
36
37
|
internal_name=_get_internal_name(feature_meta),
|
|
@@ -86,8 +87,8 @@ class FeatureInfo:
|
|
|
86
87
|
}
|
|
87
88
|
|
|
88
89
|
|
|
89
|
-
def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: pd.DataFrame) -> str:
|
|
90
|
-
if feature_meta.name in data.columns:
|
|
90
|
+
def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: Optional[pd.DataFrame]) -> str:
|
|
91
|
+
if data is not None and feature_meta.name in data.columns:
|
|
91
92
|
feature_sample = np.random.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
|
|
92
93
|
if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
|
|
93
94
|
feature_sample = [round(f, 4) for f in feature_sample]
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.48"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|