upgini 1.2.13a4__tar.gz → 1.2.14a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.13a4 → upgini-1.2.14a1}/PKG-INFO +1 -1
- upgini-1.2.14a1/src/upgini/__about__.py +1 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/autofe/binary.py +1 -5
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/dataset.py +1 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/target_utils.py +22 -18
- upgini-1.2.13a4/src/upgini/__about__.py +0 -1
- {upgini-1.2.13a4 → upgini-1.2.14a1}/.gitignore +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/LICENSE +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/README.md +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/pyproject.toml +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/__init__.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/ads.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/errors.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/features_enricher.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/http.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/metadata.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/metrics.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/search_task.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/spinner.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.13a4 → upgini-1.2.14a1}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.14a1"
|
|
@@ -140,13 +140,9 @@ class Distance(PandasOperand):
|
|
|
140
140
|
has_symmetry_importance: bool = True
|
|
141
141
|
|
|
142
142
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
143
|
-
# Handle None values by replacing them with 0 in the dot product and norm calculations
|
|
144
|
-
left = left.apply(lambda x: np.array(x) if x is not None else np.zeros_like(right[0]))
|
|
145
|
-
right = right.apply(lambda x: np.array(x) if x is not None else np.zeros_like(left[0]))
|
|
146
|
-
|
|
147
143
|
return pd.Series(
|
|
148
144
|
1 - self.__dot(left, right) / (self.__norm(left) * self.__norm(right)), index=left.index
|
|
149
|
-
)
|
|
145
|
+
).astype(np.float64)
|
|
150
146
|
|
|
151
147
|
# row-wise dot product, handling None values
|
|
152
148
|
def __dot(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
@@ -253,6 +253,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
253
253
|
min_class_percent = self.IMBALANCE_THESHOLD / target_classes_count
|
|
254
254
|
min_class_threshold = min_class_percent * count
|
|
255
255
|
|
|
256
|
+
# If min class count less than 30% for binary or (60 / classes_count)% for multiclass
|
|
256
257
|
if min_class_count < min_class_threshold:
|
|
257
258
|
self.imbalanced = True
|
|
258
259
|
self.data = balance_undersample(
|
|
@@ -150,30 +150,34 @@ def balance_undersample(
|
|
|
150
150
|
# fill up to min_sample_threshold by majority class
|
|
151
151
|
minority_class = df[df[target_column] == min_class_value]
|
|
152
152
|
majority_class = df[df[target_column] != min_class_value]
|
|
153
|
-
sample_size = min(len(majority_class), min_sample_threshold - min_class_count)
|
|
153
|
+
# sample_size = min(len(majority_class), min_sample_threshold - min_class_count)
|
|
154
|
+
sample_size = min(
|
|
155
|
+
max_class_count,
|
|
156
|
+
binary_bootstrap_loops * (min_class_count + max(min_sample_threshold - 2 * min_class_count, 0)),
|
|
157
|
+
)
|
|
154
158
|
sampled_majority_class = majority_class.sample(n=sample_size, random_state=random_state)
|
|
155
159
|
resampled_data = df[
|
|
156
160
|
(df[SYSTEM_RECORD_ID].isin(minority_class[SYSTEM_RECORD_ID]))
|
|
157
161
|
| (df[SYSTEM_RECORD_ID].isin(sampled_majority_class[SYSTEM_RECORD_ID]))
|
|
158
162
|
]
|
|
159
163
|
|
|
160
|
-
elif max_class_count > min_class_count * binary_bootstrap_loops:
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
164
|
+
# elif max_class_count > min_class_count * binary_bootstrap_loops:
|
|
165
|
+
# msg = bundle.get("dataset_rarest_class_less_threshold").format(
|
|
166
|
+
# min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
|
|
167
|
+
# )
|
|
168
|
+
# logger.warning(msg)
|
|
169
|
+
# print(msg)
|
|
170
|
+
# if warning_counter:
|
|
171
|
+
# warning_counter.increment()
|
|
172
|
+
|
|
173
|
+
# sampler = RandomUnderSampler(
|
|
174
|
+
# sampling_strategy={max_class_value: binary_bootstrap_loops * min_class_count}, random_state=random_state
|
|
175
|
+
# )
|
|
176
|
+
# X = df[SYSTEM_RECORD_ID]
|
|
177
|
+
# X = X.to_frame(SYSTEM_RECORD_ID)
|
|
178
|
+
# new_x, _ = sampler.fit_resample(X, target) # type: ignore
|
|
179
|
+
|
|
180
|
+
# resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
|
|
177
181
|
|
|
178
182
|
logger.info(f"Shape after rebalance resampling: {resampled_data}")
|
|
179
183
|
return resampled_data
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.13a4"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|