upgini 1.2.120__py3-none-any.whl → 1.2.121a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.120"
1
+ __version__ = "1.2.121a1"
@@ -519,21 +519,24 @@ class DataSourcePublisher:
519
519
  description: str = "",
520
520
  ):
521
521
  if model_type is not None and model_type not in ["ONNX", "CATBOOST"]:
522
- raise ValueError(f"Invalid model type: {model_type}. Available values: ONNX")
522
+ raise ValueError(f"Invalid model type: {model_type}. Available values: ONNX, CATBOOST")
523
523
  metadata = {
524
524
  "modelName": name,
525
525
  "inputNames": input_names,
526
526
  "dateColumn": date_column,
527
527
  "scoreName": score_name,
528
528
  "searchTaskId": search_id,
529
- "modelType": model_type or "ONNX",
529
+ "modelType": model_type or "CATBOOST",
530
530
  "description": description,
531
531
  }
532
532
 
533
533
  trace_id = str(uuid.uuid4())
534
534
  with MDC(trace_id=trace_id):
535
535
  try:
536
- self._rest_client.upload_autofe_model(file_path, metadata, trace_id)
536
+ result = self._rest_client.upload_autofe_model(file_path, metadata, trace_id)
537
+ if "ERROR" in result:
538
+ raise Exception(result)
539
+ print(result)
537
540
  except Exception:
538
541
  self.logger.exception("Failed to upload autofe model")
539
542
  raise
@@ -24,7 +24,7 @@ class FeaturesValidator:
24
24
  features_for_generate: Optional[List[str]] = None,
25
25
  columns_renaming: Optional[Dict[str, str]] = None,
26
26
  ) -> Tuple[List[str], List[str]]:
27
- # one_hot_encoded_features = []
27
+ one_hot_encoded_features = []
28
28
  empty_or_constant_features = []
29
29
  high_cardinality_features = []
30
30
  warnings = []
@@ -39,20 +39,17 @@ class FeaturesValidator:
39
39
  if most_frequent_percent >= 0.99:
40
40
  empty_or_constant_features.append(f)
41
41
 
42
- # TODO implement one-hot encoding check
43
- # if len(value_counts) == 1:
44
- # empty_or_constant_features.append(f)
45
- # elif most_frequent_percent >= 0.99:
46
- # empty_or_constant_features.append(f)
47
- # if set(value_counts.index.to_list()) == {0, 1}:
48
- # one_hot_encoded_features.append(f)
49
- # else:
50
- # empty_or_constant_features.append(f)
51
- # continue
52
-
53
- # if one_hot_encoded_features:
54
- # msg = bundle.get("one_hot_encoded_features").format(one_hot_encoded_features)
55
- # warnings.append(msg)
42
+ if len(value_counts) == 1:
43
+ empty_or_constant_features.append(f)
44
+ elif most_frequent_percent >= 0.99:
45
+ if self.is_one_hot_encoded(column):
46
+ one_hot_encoded_features.append(f)
47
+ else:
48
+ empty_or_constant_features.append(f)
49
+
50
+ if one_hot_encoded_features:
51
+ msg = bundle.get("one_hot_encoded_features").format(one_hot_encoded_features)
52
+ self.logger.info(msg)
56
53
 
57
54
  columns_renaming = columns_renaming or {}
58
55
 
@@ -102,3 +99,30 @@ class FeaturesValidator:
102
99
  @staticmethod
103
100
  def find_constant_features(df: pd.DataFrame) -> List[str]:
104
101
  return [i for i in df if df[i].nunique() <= 1]
102
+
103
+ @staticmethod
104
+ def is_one_hot_encoded(series: pd.Series) -> bool:
105
+ try:
106
+ # Column contains only 0 and 1 (as strings or numbers)
107
+ series = series.astype(float)
108
+ if set(series.unique()) != {0.0, 1.0}:
109
+ return False
110
+
111
+ series = series.astype(int)
112
+
113
+ # Column doesn't contain any NaN, np.NaN, space, null, etc.
114
+ if not (series.isin([0, 1])).all():
115
+ return False
116
+
117
+ vc = series.value_counts()
118
+ # Column should contain both 0 and 1
119
+ if len(vc) != 2:
120
+ return False
121
+
122
+ # Minority class is 1
123
+ if vc[1] >= vc[0]:
124
+ return False
125
+
126
+ return True
127
+ except ValueError:
128
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.120
3
+ Version: 1.2.121a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,4 +1,4 @@
1
- upgini/__about__.py,sha256=C4MPkUjPY8txHqkpCAHzv554Bvc9hUrOFMic1aakSTI,24
1
+ upgini/__about__.py,sha256=lbqEfhDGdLuugmia7aJpwXt4xpDEZT5h_07_bMMutgk,26
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=pQ8JQe0cdygD-W9GefJmfE6bnj4EYzXsjlgWdIS9nS8,31578
@@ -31,7 +31,7 @@ upgini/autofe/timeseries/roll.py,sha256=zADKXU-eYWQnQ5R3am1yEal8uU6Tm0jLAixwPb_a
31
31
  upgini/autofe/timeseries/trend.py,sha256=K1_iw2ko_LIUU8YCUgrvN3n0MkHtsi7-63-8x9er1k4,2129
32
32
  upgini/autofe/timeseries/volatility.py,sha256=SvZfhM_ZAWCNpTf87WjSnZsnlblARgruDlu4By4Zvhc,8078
33
33
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- upgini/data_source/data_source_publisher.py,sha256=suRmAF1i7yiZ8vJjpEKdVr5Wqtr7o1_vjAhaN9B4DU0,26518
34
+ upgini/data_source/data_source_publisher.py,sha256=qXQUYErhCmkWHm2FWgTL0FYZ2aJbxtSDV94OCM3eqUU,26653
35
35
  upgini/mdc/__init__.py,sha256=iHJlXQg6xRM1-ZOUtaPSJqw5SpQDszvxp4LyqviNLIQ,1027
36
36
  upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
37
37
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -58,7 +58,7 @@ upgini/utils/display_utils.py,sha256=uSG3JwpwCIgRJXsp-8ktuJ0Dh-WFti7IrRLMUfHfoDc
58
58
  upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
59
59
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
60
60
  upgini/utils/feature_info.py,sha256=6vihytwKma_TlXtTn4l6Aj4kqlOj0ouLy-yWVV6VUw8,7551
61
- upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
61
+ upgini/utils/features_validator.py,sha256=wkPQlQFK6EQdnOd1MxFCSmb8gEqzCYJX1isLPaeRsgU,4365
62
62
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
63
63
  upgini/utils/hash_utils.py,sha256=mP2yHyzvDNdpa5g3B4MHzulxBeEz_ZSoGl1YF_VnAyE,5538
64
64
  upgini/utils/ip_utils.py,sha256=wmnnwVQdjX9o1cNQw6VQMk6maHhvsq6hNsZBYf9knrw,6585
@@ -74,7 +74,7 @@ upgini/utils/target_utils.py,sha256=GCPn4QeJ83JJ_vyBJ3IhY5fyIRkLC9q9BE59S2FRO1I,
74
74
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
75
75
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
76
76
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
77
- upgini-1.2.120.dist-info/METADATA,sha256=KFxeOoYvqFTE347dhf5EmvIskXqWMZvxYWy3AAwOyWI,50743
78
- upgini-1.2.120.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
79
- upgini-1.2.120.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
80
- upgini-1.2.120.dist-info/RECORD,,
77
+ upgini-1.2.121a1.dist-info/METADATA,sha256=8lCLPlcxApmxxhl8DgplSrHe_Z_GHqIiOLB66OCabPo,50745
78
+ upgini-1.2.121a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
79
+ upgini-1.2.121a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
80
+ upgini-1.2.121a1.dist-info/RECORD,,