upgini 1.2.23__tar.gz → 1.2.24a2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.2.23 → upgini-1.2.24a2}/PKG-INFO +1 -1
- upgini-1.2.24a2/src/upgini/__about__.py +1 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/metrics.py +3 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/resource_bundle/strings.properties +1 -1
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/deduplicate_utils.py +2 -2
- upgini-1.2.23/src/upgini/__about__.py +0 -1
- {upgini-1.2.23 → upgini-1.2.24a2}/.gitignore +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/LICENSE +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/README.md +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/pyproject.toml +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/__init__.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/ads.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/dataset.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/errors.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/features_enricher.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/http.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/metadata.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/search_task.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/spinner.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.24a2"
|
|
@@ -273,6 +273,9 @@ class EstimatorWrapper:
|
|
|
273
273
|
else:
|
|
274
274
|
x, y = self._remove_empty_target_rows(x, y)
|
|
275
275
|
|
|
276
|
+
# Make order of columns idempotent
|
|
277
|
+
x = x[sorted(x.columns)]
|
|
278
|
+
|
|
276
279
|
self.logger.info(f"After preparing data columns: {x.columns.to_list()}")
|
|
277
280
|
return x, y, groups
|
|
278
281
|
|
|
@@ -171,7 +171,7 @@ dataset_invalid_column_type=Unsupported data type of column {}: {}
|
|
|
171
171
|
dataset_invalid_filter=Unknown field in filter_features. Should be {'min_importance', 'max_psi', 'max_count', 'selected_features'}.
|
|
172
172
|
dataset_too_big_file=Too big size of dataframe X for processing. Please reduce number of rows or columns
|
|
173
173
|
dataset_transform_diff_fit=You try to enrich dataset that column names are different from the train dataset column names that you used on the fit stage. Please make the column names the same as in the train dataset and restart.
|
|
174
|
-
binary_small_dataset=The least populated class in Target contains less than 1000 rows.\nSmall numbers of observations may negatively affect the number of selected features and quality of your ML model.\nUpgini recommends you increase the number of observations in the least populated class
|
|
174
|
+
binary_small_dataset=The least populated class in Target contains less than 1000 rows.\nSmall numbers of observations may negatively affect the number of selected features and quality of your ML model.\nUpgini recommends you increase the number of observations in the least populated class.\n
|
|
175
175
|
all_search_keys_invalid=All search keys are invalid
|
|
176
176
|
all_emails_invalid=All values in column {} are invalid emails # Metrics validation
|
|
177
177
|
metrics_msle_negative_target=Mean Squared Logarithmic Error cannot be used when y contain negative values
|
|
@@ -24,7 +24,7 @@ def remove_fintech_duplicates(
|
|
|
24
24
|
date_format: Optional[str] = None,
|
|
25
25
|
logger: Optional[Logger] = None,
|
|
26
26
|
bundle: ResourceBundle = None,
|
|
27
|
-
) ->
|
|
27
|
+
) -> Tuple[pd.DataFrame, Optional[List[str]]]:
|
|
28
28
|
# Initial checks for target type and date column
|
|
29
29
|
bundle = bundle or get_custom_bundle()
|
|
30
30
|
if logger is None:
|
|
@@ -60,7 +60,7 @@ def remove_fintech_duplicates(
|
|
|
60
60
|
|
|
61
61
|
warning_messages = []
|
|
62
62
|
|
|
63
|
-
def process_df(segment_df: pd.DataFrame, eval_index=0) ->
|
|
63
|
+
def process_df(segment_df: pd.DataFrame, eval_index=0) -> Tuple[pd.DataFrame, Optional[str]]:
|
|
64
64
|
"""Process a subset of the dataset to remove duplicates based on personal keys."""
|
|
65
65
|
# Fast check for duplicates based on personal keys
|
|
66
66
|
if not segment_df[personal_cols].duplicated().any():
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.23"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|