upgini 1.2.9a110__tar.gz → 1.2.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.9a110 → upgini-1.2.10}/PKG-INFO +1 -2
- {upgini-1.2.9a110 → upgini-1.2.10}/pyproject.toml +0 -1
- upgini-1.2.10/src/upgini/__about__.py +1 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/features_enricher.py +13 -8
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/metrics.py +2 -1
- upgini-1.2.9a110/src/upgini/__about__.py +0 -1
- {upgini-1.2.9a110 → upgini-1.2.10}/.gitignore +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/LICENSE +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/README.md +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/__init__.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/ads.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/dataset.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/errors.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/http.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/metadata.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/search_task.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/spinner.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.9a110 → upgini-1.2.10}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.10
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -28,7 +28,6 @@ Requires-Dist: fastparquet>=0.8.1
|
|
|
28
28
|
Requires-Dist: ipywidgets>=8.1.0
|
|
29
29
|
Requires-Dist: jarowinkler>=2.0.0
|
|
30
30
|
Requires-Dist: levenshtein>=0.25.1
|
|
31
|
-
Requires-Dist: lightgbm>=3.3.2
|
|
32
31
|
Requires-Dist: numpy<=1.26.4,>=1.19.0
|
|
33
32
|
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
34
33
|
Requires-Dist: pydantic<3.0.0,>1.0.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.10"
|
|
@@ -1633,10 +1633,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1633
1633
|
|
|
1634
1634
|
rows_to_drop = None
|
|
1635
1635
|
has_date = SearchKey.find_key(search_keys, [SearchKey.DATE, SearchKey.DATETIME]) is not None
|
|
1636
|
-
|
|
1636
|
+
self.model_task_type = self.model_task_type or define_task(
|
|
1637
1637
|
self.df_with_original_index[TARGET], has_date, self.logger, silent=True
|
|
1638
1638
|
)
|
|
1639
|
-
if
|
|
1639
|
+
if self.model_task_type == ModelTaskType.REGRESSION:
|
|
1640
1640
|
target_outliers_df = self._search_task.get_target_outliers(trace_id)
|
|
1641
1641
|
if target_outliers_df is not None and len(target_outliers_df) > 0:
|
|
1642
1642
|
outliers = pd.merge(
|
|
@@ -2391,12 +2391,12 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2391
2391
|
|
|
2392
2392
|
maybe_date_column = SearchKey.find_key(self.fit_search_keys, [SearchKey.DATE, SearchKey.DATETIME])
|
|
2393
2393
|
has_date = maybe_date_column is not None
|
|
2394
|
-
model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
|
|
2394
|
+
self.model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
|
|
2395
2395
|
|
|
2396
|
-
self._validate_binary_observations(validated_y, model_task_type)
|
|
2396
|
+
self._validate_binary_observations(validated_y, self.model_task_type)
|
|
2397
2397
|
|
|
2398
2398
|
self.runtime_parameters = get_runtime_params_custom_loss(
|
|
2399
|
-
self.loss, model_task_type, self.runtime_parameters, self.logger
|
|
2399
|
+
self.loss, self.model_task_type, self.runtime_parameters, self.logger
|
|
2400
2400
|
)
|
|
2401
2401
|
|
|
2402
2402
|
if validated_eval_set is not None and len(validated_eval_set) > 0:
|
|
@@ -2449,7 +2449,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2449
2449
|
if is_numeric_dtype(df[self.TARGET_NAME]) and has_date:
|
|
2450
2450
|
self._validate_PSI(df.sort_values(by=maybe_date_column))
|
|
2451
2451
|
|
|
2452
|
-
self.__adjust_cv(df, maybe_date_column, model_task_type)
|
|
2452
|
+
self.__adjust_cv(df, maybe_date_column, self.model_task_type)
|
|
2453
2453
|
|
|
2454
2454
|
normalizer = Normalizer(
|
|
2455
2455
|
self.fit_search_keys, self.fit_generated_features, self.bundle, self.logger, self.warning_counter
|
|
@@ -2557,7 +2557,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2557
2557
|
meaning_types=meaning_types,
|
|
2558
2558
|
search_keys=combined_search_keys,
|
|
2559
2559
|
unnest_search_keys=unnest_search_keys,
|
|
2560
|
-
model_task_type=model_task_type,
|
|
2560
|
+
model_task_type=self.model_task_type,
|
|
2561
2561
|
date_format=self.date_format,
|
|
2562
2562
|
random_state=self.random_state,
|
|
2563
2563
|
rest_client=self.rest_client,
|
|
@@ -2780,6 +2780,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2780
2780
|
raise ValidationError(self.bundle.get("x_contains_reserved_column_name").format(EVAL_SET_INDEX))
|
|
2781
2781
|
if SYSTEM_RECORD_ID in validated_X.columns:
|
|
2782
2782
|
raise ValidationError(self.bundle.get("x_contains_reserved_column_name").format(SYSTEM_RECORD_ID))
|
|
2783
|
+
if ENTITY_SYSTEM_RECORD_ID in validated_X.columns:
|
|
2784
|
+
raise ValidationError(self.bundle.get("x_contains_reserved_column_name").format(ENTITY_SYSTEM_RECORD_ID))
|
|
2783
2785
|
|
|
2784
2786
|
return validated_X
|
|
2785
2787
|
|
|
@@ -3760,7 +3762,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3760
3762
|
display_html_dataframe(self.metrics, self.metrics, msg)
|
|
3761
3763
|
|
|
3762
3764
|
def __show_selected_features(self, search_keys: Dict[str, SearchKey]):
|
|
3763
|
-
|
|
3765
|
+
search_key_names = search_keys.keys()
|
|
3766
|
+
if self.fit_columns_renaming:
|
|
3767
|
+
search_key_names = [self.fit_columns_renaming.get(col, col) for col in search_key_names]
|
|
3768
|
+
msg = self.bundle.get("features_info_header").format(len(self.feature_names_), search_key_names)
|
|
3764
3769
|
|
|
3765
3770
|
try:
|
|
3766
3771
|
_ = get_ipython() # type: ignore
|
|
@@ -10,7 +10,6 @@ import catboost
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
import pandas as pd
|
|
12
12
|
from catboost import CatBoostClassifier, CatBoostRegressor
|
|
13
|
-
from lightgbm import LGBMClassifier, LGBMRegressor
|
|
14
13
|
from numpy import log1p
|
|
15
14
|
from pandas.api.types import is_numeric_dtype
|
|
16
15
|
from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
|
|
@@ -408,6 +407,8 @@ class EstimatorWrapper:
|
|
|
408
407
|
estimator = CatBoostWrapper(**kwargs)
|
|
409
408
|
else:
|
|
410
409
|
try:
|
|
410
|
+
from lightgbm import LGBMClassifier, LGBMRegressor
|
|
411
|
+
|
|
411
412
|
if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
|
|
412
413
|
estimator = LightGBMWrapper(**kwargs)
|
|
413
414
|
else:
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.9a110"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|