upgini 1.1.197__tar.gz → 1.1.199__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.197/src/upgini.egg-info → upgini-1.1.199}/PKG-INFO +7 -2
- {upgini-1.1.197 → upgini-1.1.199}/README.md +6 -1
- {upgini-1.1.197 → upgini-1.1.199}/setup.py +1 -1
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/features_enricher.py +9 -5
- {upgini-1.1.197 → upgini-1.1.199/src/upgini.egg-info}/PKG-INFO +7 -2
- {upgini-1.1.197 → upgini-1.1.199}/LICENSE +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/pyproject.toml +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/setup.cfg +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/__init__.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/ads.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/dataset.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/errors.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/fingerprint.js +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/http.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/metadata.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/metrics.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/search_task.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/spinner.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini.egg-info/SOURCES.txt +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_country_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_email_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_etalon_validation.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_features_enricher.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_metrics.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_phone_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_postal_code_utils.py +0 -0
- {upgini-1.1.197 → upgini-1.1.199}/tests/test_widget.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.199
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Home-page: https://upgini.com/
|
|
6
6
|
Author: Upgini Developers
|
|
@@ -96,7 +96,12 @@ License-File: LICENSE
|
|
|
96
96
|
<td><a href="https://en.wikipedia.org/wiki/Regression_analysis">☑️ regression</a></td>
|
|
97
97
|
<td><a href="https://en.wikipedia.org/wiki/Time_series#Prediction_and_forecasting">☑️ time series prediction</a></td>
|
|
98
98
|
</tr>
|
|
99
|
-
</table>
|
|
99
|
+
</table>
|
|
100
|
+
|
|
101
|
+
⭐️ [Simple Drag & Drop Search UI](https://appwidget-uragwvgykrk4sbmropphpy.streamlit.app/~/+/):
|
|
102
|
+
<a href="https://appwidget-uragwvgykrk4sbmropphpy.streamlit.app/~/+/">
|
|
103
|
+
<img width="710" alt="Drag & Drop Search UI" src="https://github.com/upgini/upgini/assets/95645411/36b6460c-51f3-400e-9f04-445b938bf45e">
|
|
104
|
+
</a>
|
|
100
105
|
|
|
101
106
|
|
|
102
107
|
## 🌎 Connected data sources and coverage
|
|
@@ -66,7 +66,12 @@
|
|
|
66
66
|
<td><a href="https://en.wikipedia.org/wiki/Regression_analysis">☑️ regression</a></td>
|
|
67
67
|
<td><a href="https://en.wikipedia.org/wiki/Time_series#Prediction_and_forecasting">☑️ time series prediction</a></td>
|
|
68
68
|
</tr>
|
|
69
|
-
</table>
|
|
69
|
+
</table>
|
|
70
|
+
|
|
71
|
+
⭐️ [Simple Drag & Drop Search UI](https://appwidget-uragwvgykrk4sbmropphpy.streamlit.app/~/+/):
|
|
72
|
+
<a href="https://appwidget-uragwvgykrk4sbmropphpy.streamlit.app/~/+/">
|
|
73
|
+
<img width="710" alt="Drag & Drop Search UI" src="https://github.com/upgini/upgini/assets/95645411/36b6460c-51f3-400e-9f04-445b938bf45e">
|
|
74
|
+
</a>
|
|
70
75
|
|
|
71
76
|
|
|
72
77
|
## 🌎 Connected data sources and coverage
|
|
@@ -13,7 +13,7 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
|
|
13
13
|
|
|
14
14
|
import numpy as np
|
|
15
15
|
import pandas as pd
|
|
16
|
-
from pandas.api.types import is_string_dtype
|
|
16
|
+
from pandas.api.types import is_numeric_dtype, is_string_dtype
|
|
17
17
|
from scipy.stats import ks_2samp
|
|
18
18
|
from sklearn.base import TransformerMixin
|
|
19
19
|
from sklearn.exceptions import NotFittedError
|
|
@@ -946,7 +946,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
946
946
|
bundle.get("quality_metrics_rows_header"): _num_samples(fitting_X),
|
|
947
947
|
# bundle.get("quality_metrics_match_rate_header"): self._search_task.initial_max_hit_rate_v2(),
|
|
948
948
|
}
|
|
949
|
-
if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION]
|
|
949
|
+
if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
|
|
950
|
+
y_sorted
|
|
951
|
+
):
|
|
950
952
|
train_metrics[bundle.get("quality_metrics_mean_target_header")] = round(y_sorted.mean(), 4)
|
|
951
953
|
if etalon_metric is not None:
|
|
952
954
|
train_metrics[bundle.get("quality_metrics_baseline_header").format(metric)] = etalon_metric
|
|
@@ -1008,7 +1010,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1008
1010
|
bundle.get("quality_metrics_rows_header"): _num_samples(eval_X_sorted),
|
|
1009
1011
|
# bundle.get("quality_metrics_match_rate_header"): eval_hit_rate,
|
|
1010
1012
|
}
|
|
1011
|
-
if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION]
|
|
1013
|
+
if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
|
|
1014
|
+
eval_y_sorted
|
|
1015
|
+
):
|
|
1012
1016
|
eval_metrics[bundle.get("quality_metrics_mean_target_header")] = round(
|
|
1013
1017
|
eval_y_sorted.mean(), 4
|
|
1014
1018
|
)
|
|
@@ -1204,7 +1208,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1204
1208
|
self.logger.info("No external features selected. So use only input datasets for metrics calculation")
|
|
1205
1209
|
X_sampled, search_keys = self._extend_x(validated_X, is_demo_dataset)
|
|
1206
1210
|
y_sampled = validated_y
|
|
1207
|
-
enriched_X =
|
|
1211
|
+
enriched_X = X_sampled
|
|
1208
1212
|
if eval_set is not None:
|
|
1209
1213
|
for idx in range(len(eval_set)):
|
|
1210
1214
|
eval_X_sampled, _ = self._extend_x(eval_set[idx][0], is_demo_dataset)
|
|
@@ -2315,7 +2319,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2315
2319
|
do_without_pandas_limits(print_datasets_sample)
|
|
2316
2320
|
|
|
2317
2321
|
maybe_date_col = self._get_date_column(self.search_keys)
|
|
2318
|
-
if X is not None and maybe_date_col is not None:
|
|
2322
|
+
if X is not None and maybe_date_col is not None and maybe_date_col in X.columns:
|
|
2319
2323
|
min_date = X[maybe_date_col].min()
|
|
2320
2324
|
max_date = X[maybe_date_col].max()
|
|
2321
2325
|
self.logger.info(f"Dates interval is ({min_date}, {max_date})")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.199
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Home-page: https://upgini.com/
|
|
6
6
|
Author: Upgini Developers
|
|
@@ -96,7 +96,12 @@ License-File: LICENSE
|
|
|
96
96
|
<td><a href="https://en.wikipedia.org/wiki/Regression_analysis">☑️ regression</a></td>
|
|
97
97
|
<td><a href="https://en.wikipedia.org/wiki/Time_series#Prediction_and_forecasting">☑️ time series prediction</a></td>
|
|
98
98
|
</tr>
|
|
99
|
-
</table>
|
|
99
|
+
</table>
|
|
100
|
+
|
|
101
|
+
⭐️ [Simple Drag & Drop Search UI](https://appwidget-uragwvgykrk4sbmropphpy.streamlit.app/~/+/):
|
|
102
|
+
<a href="https://appwidget-uragwvgykrk4sbmropphpy.streamlit.app/~/+/">
|
|
103
|
+
<img width="710" alt="Drag & Drop Search UI" src="https://github.com/upgini/upgini/assets/95645411/36b6460c-51f3-400e-9f04-445b938bf45e">
|
|
104
|
+
</a>
|
|
100
105
|
|
|
101
106
|
|
|
102
107
|
## 🌎 Connected data sources and coverage
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|