upgini 1.2.86a2__py3-none-any.whl → 1.2.87a3857.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.86a2"
1
+ __version__ = "1.2.87a3857.dev1"
@@ -1664,6 +1664,10 @@ class FeaturesEnricher(TransformerMixin):
1664
1664
  enriched_eval_y_sorted,
1665
1665
  )
1666
1666
 
1667
+ fitting_X, fitting_enriched_X, fitting_eval_set_dict = self._convert_id_columns_to_int(
1668
+ fitting_X, fitting_enriched_X, fitting_eval_set_dict, columns_renaming
1669
+ )
1670
+
1667
1671
  return (
1668
1672
  validated_X,
1669
1673
  fitting_X,
@@ -1677,6 +1681,38 @@ class FeaturesEnricher(TransformerMixin):
1677
1681
  columns_renaming,
1678
1682
  )
1679
1683
 
1684
+ def _convert_id_columns_to_int(
1685
+ self,
1686
+ fitting_X: pd.DataFrame,
1687
+ fitting_enriched_X: pd.DataFrame,
1688
+ fitting_eval_set_dict: Dict[int, Tuple[pd.DataFrame, pd.Series]],
1689
+ columns_renaming: Dict[str, str] = {},
1690
+ ) -> pd.DataFrame:
1691
+ def _set_encoded(col_name: str, df: pd.DataFrame, slice: Tuple[int, int], combined_col: pd.Series):
1692
+ df[col_name] = combined_col.iloc[slice[0] : slice[1]]
1693
+ return slice[1]
1694
+
1695
+ inverse_columns_renaming = {v: k for k, v in columns_renaming.items()}
1696
+
1697
+ if self.id_columns:
1698
+ self.logger.info(f"Convert id columns to int: {self.id_columns}")
1699
+ for col in self.id_columns:
1700
+ col = inverse_columns_renaming.get(col, col)
1701
+ combined_col = pd.concat(
1702
+ [fitting_X[col], fitting_enriched_X[col]]
1703
+ + [eval_set_pair[0][col] for eval_set_pair in fitting_eval_set_dict.values()]
1704
+ )
1705
+ combined_col = combined_col.astype("category").cat.codes
1706
+ slice_end = _set_encoded(col, fitting_X, (0, len(fitting_X)), combined_col)
1707
+ slice_end = _set_encoded(
1708
+ col, fitting_enriched_X, (slice_end, slice_end + len(fitting_enriched_X)), combined_col
1709
+ )
1710
+ for eval_set_pair in fitting_eval_set_dict.values():
1711
+ slice_end = _set_encoded(
1712
+ col, eval_set_pair[0], (slice_end, slice_end + len(eval_set_pair[0])), combined_col
1713
+ )
1714
+ return fitting_X, fitting_enriched_X, fitting_eval_set_dict
1715
+
1680
1716
  @dataclass
1681
1717
  class _SampledDataForMetrics:
1682
1718
  X_sampled: pd.DataFrame
@@ -2204,10 +2240,12 @@ class FeaturesEnricher(TransformerMixin):
2204
2240
  {"name": name, "value": key_example(sk_type)} for name in sk_meta.unnestKeyNames
2205
2241
  ]
2206
2242
  else:
2207
- search_keys_with_values[sk_type.name] = [{
2208
- "name": sk_meta.originalName,
2209
- "value": key_example(sk_type),
2210
- }]
2243
+ search_keys_with_values[sk_type.name] = [
2244
+ {
2245
+ "name": sk_meta.originalName,
2246
+ "value": key_example(sk_type),
2247
+ }
2248
+ ]
2211
2249
 
2212
2250
  keys_section = json.dumps(search_keys_with_values)
2213
2251
  features_for_transform = self._search_task.get_features_for_transform()
@@ -3927,7 +3965,7 @@ if response.status_code == 200:
3927
3965
  if features_meta is None:
3928
3966
  raise Exception(self.bundle.get("missing_features_meta"))
3929
3967
 
3930
- return [f.name for f in features_meta if f.type == "categorical"]
3968
+ return [f.name for f in features_meta if f.type == "categorical" and f.name not in self.id_columns]
3931
3969
 
3932
3970
  def __prepare_feature_importances(
3933
3971
  self, trace_id: str, df: pd.DataFrame, updated_shaps: Optional[Dict[str, float]] = None, silent=False
upgini/metrics.py CHANGED
@@ -314,7 +314,7 @@ class EstimatorWrapper:
314
314
  self.groups = groups
315
315
  self.text_features = text_features
316
316
  self.logger = logger or logging.getLogger()
317
- self.droped_features = []
317
+ self.dropped_features = []
318
318
  self.converted_to_int = []
319
319
  self.converted_to_str = []
320
320
  self.converted_to_numeric = []
@@ -363,10 +363,11 @@ class EstimatorWrapper:
363
363
  x, y, groups = self._prepare_data(x, y, groups=self.groups)
364
364
 
365
365
  self.logger.info(f"Before preparing data columns: {x.columns.to_list()}")
366
- self.droped_features = []
366
+ self.dropped_features = []
367
367
  self.converted_to_int = []
368
368
  self.converted_to_str = []
369
369
  self.converted_to_numeric = []
370
+
370
371
  for c in x.columns:
371
372
 
372
373
  if _get_unique_count(x[c]) < 2:
@@ -374,7 +375,7 @@ class EstimatorWrapper:
374
375
  if c in self.cat_features:
375
376
  self.cat_features.remove(c)
376
377
  x.drop(columns=[c], inplace=True)
377
- self.droped_features.append(c)
378
+ self.dropped_features.append(c)
378
379
  elif self.text_features is not None and c in self.text_features:
379
380
  x[c] = x[c].astype(str)
380
381
  self.converted_to_str.append(c)
@@ -391,9 +392,7 @@ class EstimatorWrapper:
391
392
  self.converted_to_int.append(c)
392
393
  self.cat_features.remove(c)
393
394
  elif is_float_dtype(x[c]) or (x[c].dtype == "category" and is_float_dtype(x[c].cat.categories)):
394
- self.logger.info(
395
- f"Convert float cat feature {c} to string"
396
- )
395
+ self.logger.info(f"Convert float cat feature {c} to string")
397
396
  x[c] = x[c].astype(str)
398
397
  self.converted_to_str.append(c)
399
398
  elif x[c].dtype not in ["category", "int64"]:
@@ -411,16 +410,16 @@ class EstimatorWrapper:
411
410
  except (ValueError, TypeError):
412
411
  self.logger.warning(f"Remove feature {c} because it is not numeric and not in cat_features")
413
412
  x.drop(columns=[c], inplace=True)
414
- self.droped_features.append(c)
413
+ self.dropped_features.append(c)
415
414
 
416
415
  return x, y, groups, {}
417
416
 
418
417
  def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
419
418
  x, y, _ = self._prepare_data(x, y)
420
419
 
421
- if self.droped_features:
422
- self.logger.info(f"Drop features on calculate metrics: {self.droped_features}")
423
- x = x.drop(columns=self.droped_features)
420
+ if self.dropped_features:
421
+ self.logger.info(f"Drop features on calculate metrics: {self.dropped_features}")
422
+ x = x.drop(columns=self.dropped_features)
424
423
 
425
424
  if self.converted_to_int:
426
425
  self.logger.info(f"Convert to int features on calculate metrics: {self.converted_to_int}")
@@ -763,7 +762,7 @@ class CatBoostWrapper(EstimatorWrapper):
763
762
  )
764
763
  for f in high_cardinality_features:
765
764
  self.text_features.remove(f)
766
- self.droped_features.append(f)
765
+ self.dropped_features.append(f)
767
766
  x = x.drop(columns=f, errors="ignore")
768
767
  return super().cross_val_predict(x, y, baseline_score_column)
769
768
  else:
@@ -853,7 +852,7 @@ class LightGBMWrapper(EstimatorWrapper):
853
852
  for c in x.columns:
854
853
  if x[c].dtype not in ["category", "int64", "float64", "bool"]:
855
854
  self.logger.warning(f"Feature {c} is not numeric and will be dropped")
856
- self.droped_features.append(c)
855
+ self.dropped_features.append(c)
857
856
  x = x.drop(columns=c, errors="ignore")
858
857
  return x, y_numpy, groups, params
859
858
 
@@ -933,7 +932,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
933
932
  for c in x.columns:
934
933
  if x[c].dtype not in ["category", "int64", "float64", "bool"]:
935
934
  self.logger.warning(f"Feature {c} is not numeric and will be dropped")
936
- self.droped_features.append(c)
935
+ self.dropped_features.append(c)
937
936
  x = x.drop(columns=c, errors="ignore")
938
937
  return x, y_numpy, groups, params
939
938
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.86a2
3
+ Version: 1.2.87a3857.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=yGLa0SZe61T_OjwHem32zlqsP2f3eCCrsj4uwsanjlA,25
1
+ upgini/__about__.py,sha256=3sSsl0Y82MTrFi1HkJZNIy6czqNHsAzjhFjDu3_mdew,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=fRtqSkXNONLnPe6cCL967GMt349FTIpXzy_u8LUKncw,35354
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=G0qbRPdlWe9p6cwYF3khP99-0kgAO8N0A2sfQxSLgmM,213446
6
+ upgini/features_enricher.py,sha256=G69F0tRPjKWMhNwvXK0dgHzyTSMHShGN0ycrtYge6kA,215354
7
7
  upgini/http.py,sha256=6Qcepv0tDC72mBBJxYHnA2xqw6QwFaKrXN8o4vju8Es,44372
8
8
  upgini/metadata.py,sha256=zt_9k0iQbWXuiRZcel4ORNPdQKt6Ou69ucZD_E1Q46o,12341
9
- upgini/metrics.py,sha256=3cip0_L6-OFew74KsRwzxJDU6UFq05h2v7IsyHLcMRc,43164
9
+ upgini/metrics.py,sha256=HJ5DpnrWAwrlw3_JlAWEhs1SXfI-_R4TGp2ajavOE14,43129
10
10
  upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.86a2.dist-info/METADATA,sha256=xU87Vnwtiae10PnJMUIC5KiOMP_TUEZ8BeafznKJxCg,49164
74
- upgini-1.2.86a2.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.86a2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.86a2.dist-info/RECORD,,
73
+ upgini-1.2.87a3857.dev1.dist-info/METADATA,sha256=CAuubJghDMbrQnw0lkf8Go-cHO0uvsYaQYVel40FlLM,49172
74
+ upgini-1.2.87a3857.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
75
+ upgini-1.2.87a3857.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.87a3857.dev1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.24.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any