upgini 1.2.98a3594.dev7__py3-none-any.whl → 1.2.98a3594.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.98a3594.dev7"
1
+ __version__ = "1.2.98a3594.dev9"
upgini/autofe/feature.py CHANGED
@@ -18,9 +18,6 @@ class Column:
18
18
  self.data = data
19
19
  self.calculate_all = calculate_all
20
20
 
21
- def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
22
- return self.name
23
-
24
21
  def set_op_params(self, params: Dict[str, str], **kwargs) -> "Column":
25
22
  return self
26
23
 
@@ -37,8 +34,21 @@ class Column:
37
34
  def get_column_nodes(self) -> List["Column"]:
38
35
  return [self]
39
36
 
40
- def get_columns(self, **kwargs) -> List[str]:
41
- return [self.name]
37
+ def get_columns(self, unhash=False, **kwargs):
38
+ name = self.name
39
+ return [self._unhash(name) if unhash else name]
40
+
41
+ def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
42
+ return self.get_columns(**kwargs)[0]
43
+
44
+ def _unhash(self, feature_name: str) -> str:
45
+ last_component_idx = feature_name.rfind("_")
46
+ if not feature_name.startswith("f_"):
47
+ return feature_name # etalon feature
48
+ elif last_component_idx == 1:
49
+ return feature_name[2:] # fully hashed name, cannot unhash
50
+ else:
51
+ return feature_name[2:last_component_idx]
42
52
 
43
53
  @property
44
54
  def children(self) -> List[Union["Feature", "Column"]]:
@@ -91,7 +101,7 @@ class Feature:
91
101
  child_params = {
92
102
  k[len(child.get_display_name(**kwargs)) + 1 :]: v
93
103
  for k, v in params.items()
94
- if k.startswith(child.get_display_name())
104
+ if k.startswith(child.get_display_name(**kwargs))
95
105
  }
96
106
  if not child_params:
97
107
  child_params = params
@@ -4174,7 +4174,7 @@ if response.status_code == 200:
4174
4174
 
4175
4175
  description = {}
4176
4176
 
4177
- feature_meta = get_feature_by_name(autofe_feature.get_display_name(shorten=True))
4177
+ feature_meta = get_feature_by_name(autofe_feature.get_display_name(shorten=True, unhash=True))
4178
4178
  if feature_meta is None:
4179
4179
  self.logger.warning(f"Feature meta for display index {m.display_index} not found")
4180
4180
  continue
upgini/metrics.py CHANGED
@@ -399,14 +399,14 @@ class EstimatorWrapper:
399
399
  self.converted_to_str.append(c)
400
400
  elif c in self.cat_features:
401
401
  if x[c].dtype == "bool" or (x[c].dtype == "category" and x[c].cat.categories.dtype == "bool"):
402
- x[c] = x[c].astype(np.int64)
402
+ x[c] = x[c].astype(pd.Int64Dtype())
403
403
  self.converted_to_int.append(c)
404
404
  elif x[c].dtype == "category" and is_integer_dtype(x[c].cat.categories):
405
405
  self.logger.info(
406
406
  f"Convert categorical feature {c} with integer categories"
407
407
  " to int64 and remove from cat_features"
408
408
  )
409
- x[c] = x[c].astype(np.int64)
409
+ x[c] = x[c].astype(pd.Int64Dtype())
410
410
  self.converted_to_int.append(c)
411
411
  self.cat_features.remove(c)
412
412
  elif is_float_dtype(x[c]) or (x[c].dtype == "category" and is_float_dtype(x[c].cat.categories)):
@@ -419,7 +419,7 @@ class EstimatorWrapper:
419
419
  else:
420
420
  if x[c].dtype == "bool" or (x[c].dtype == "category" and x[c].cat.categories.dtype == "bool"):
421
421
  self.logger.info(f"Convert bool feature {c} to int64")
422
- x[c] = x[c].astype(np.int64)
422
+ x[c] = x[c].astype(pd.Int64Dtype())
423
423
  self.converted_to_int.append(c)
424
424
  elif not is_valid_numeric_array_data(x[c]) and not is_numeric_dtype(x[c]):
425
425
  try:
@@ -442,7 +442,7 @@ class EstimatorWrapper:
442
442
  if self.converted_to_int:
443
443
  self.logger.info(f"Convert to int features on calculate metrics: {self.converted_to_int}")
444
444
  for c in self.converted_to_int:
445
- x[c] = x[c].astype(np.int64)
445
+ x[c] = x[c].astype(pd.Int64Dtype())
446
446
 
447
447
  if self.converted_to_str:
448
448
  self.logger.info(f"Convert to str features on calculate metrics: {self.converted_to_str}")
@@ -896,7 +896,7 @@ class LightGBMWrapper(EstimatorWrapper):
896
896
  x[c] = x[c].astype("category")
897
897
 
898
898
  for c in x.columns:
899
- if x[c].dtype not in ["category", "int64", "float64", "bool"]:
899
+ if x[c].dtype not in ["category", "int64", "float64", "bool", "Int64"]:
900
900
  self.logger.warning(f"Feature {c} is not numeric and will be dropped")
901
901
  self.dropped_features.append(c)
902
902
  x = x.drop(columns=c, errors="ignore")
@@ -987,7 +987,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
987
987
  x[c] = x[c].astype("category")
988
988
  params["cat_features"] = self.cat_features
989
989
  for c in x.columns:
990
- if x[c].dtype not in ["category", "int64", "float64", "bool"]:
990
+ if x[c].dtype not in ["category", "int64", "float64", "bool", "Int64"]:
991
991
  self.logger.warning(f"Feature {c} is not numeric and will be dropped")
992
992
  self.dropped_features.append(c)
993
993
  x = x.drop(columns=c, errors="ignore")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.98a3594.dev7
3
+ Version: 1.2.98a3594.dev9
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=Cv7Vb1EHNYQxAzPmCvniGxMKVZxzLRYgmTI7sXodNFA,33
1
+ upgini/__about__.py,sha256=r1QcyBGBiIjDj9sPC6KSm49ozEaBc-rKpwS3j6qlD1A,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=e6JDYTZ2AwC5aF-dqclKZKkiKrHo2f6cFmMQO2ZZmjM,32724
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=4rKoV-3jM876Fk0fM4XlnW3fLwXvk1KN2ymcwlAfPm0,219941
6
+ upgini/features_enricher.py,sha256=KSOEzO29nY79RIW0hdbf1qXQGxa3itKZ0PkcwVPPf9U,219954
7
7
  upgini/http.py,sha256=DNcoS7qdxG0mOJn6I8r6O5I6XdIJTdzDzW3hkz3NgG4,45443
8
8
  upgini/metadata.py,sha256=vsbbHyPCP3Rs8WkeDgQg99uAA_zmsbDStAT-NwDYhO4,12455
9
- upgini/metrics.py,sha256=UbKEsHB7XDzoyGNqDx846zbh1t65GpqdnnhViccdoKU,45615
9
+ upgini/metrics.py,sha256=gXr2aiw5j9QBWBo1hZp40Is679hef5q8MrT6LJfjsBk,45661
10
10
  upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -16,7 +16,7 @@ upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  upgini/autofe/all_operators.py,sha256=rdjF5eaE4bC6Q4eu_el5Z7ekYt8DjOFermz2bePPbUc,333
17
17
  upgini/autofe/binary.py,sha256=oOEECc4nRzZN2tYaiqx8F2XHnfWpk1bVvb7ZkZJ0lO8,7709
18
18
  upgini/autofe/date.py,sha256=MM1S-6imNSzCDOhbNnmsc_bwSqUWBcS8vWAdHF8j1kY,11134
19
- upgini/autofe/feature.py,sha256=cDoVLD1M33HHPVxGGimXmcHZhAG0yIaAOoddxvhA8U0,15596
19
+ upgini/autofe/feature.py,sha256=1jiy9_aiaQdVGIh5UbnIGF8St5BkiikOUh5KywMLYRY,16056
20
20
  upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
21
21
  upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
22
22
  upgini/autofe/unary.py,sha256=FFtvkQaT0cu_zPZ1jCLcsjik-UUh12qQFF3tUW8NqsE,6675
@@ -71,7 +71,7 @@ upgini/utils/target_utils.py,sha256=i3Xt5l9ybB2_nF_ma5cfPuL3OeFTs2dY2xDI0p4Azpg,
71
71
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
72
72
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
73
73
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
74
- upgini-1.2.98a3594.dev7.dist-info/METADATA,sha256=kYf9yIOvMvAhrwmSAs-s0OPEqP_rhFeNfguQ4ZHeVrA,49538
75
- upgini-1.2.98a3594.dev7.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
76
- upgini-1.2.98a3594.dev7.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
77
- upgini-1.2.98a3594.dev7.dist-info/RECORD,,
74
+ upgini-1.2.98a3594.dev9.dist-info/METADATA,sha256=yJExMJu_WwWBTYSbDIx1w455w1VxcYId_YLHvSgWJoA,49538
75
+ upgini-1.2.98a3594.dev9.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
76
+ upgini-1.2.98a3594.dev9.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
77
+ upgini-1.2.98a3594.dev9.dist-info/RECORD,,