upgini 1.1.280a3418.post2__py3-none-any.whl → 1.1.280.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.1.280a3418-2"
1
+ __version__ = "1.1.280.dev0"
@@ -4,7 +4,7 @@ from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
4
4
  from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
5
5
  from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
6
6
  from upgini.autofe.operand import Operand
7
- from upgini.autofe.unary import Abs, Bin, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
7
+ from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
8
8
  from upgini.autofe.vector import Mean, Sum
9
9
 
10
10
  ALL_OPERANDS: Dict[str, Operand] = {
@@ -49,7 +49,6 @@ ALL_OPERANDS: Dict[str, Operand] = {
49
49
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=30, upper_bound=45),
50
50
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
51
51
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
52
- Bin(),
53
52
  ]
54
53
  }
55
54
 
upgini/autofe/unary.py CHANGED
@@ -111,21 +111,3 @@ class Freq(PandasOperand):
111
111
  def calculate_unary(self, data: pd.Series) -> pd.Series:
112
112
  value_counts = data.value_counts(normalize=True)
113
113
  return self._loc(data, value_counts)
114
-
115
-
116
- class Bin(PandasOperand):
117
- name = "bin"
118
- is_unary = True
119
- output_type = "int"
120
- input_type = "discrete"
121
-
122
- zero_bound_low: int
123
- zero_bound_high: int
124
- step: int
125
-
126
- def calculate_unary(self, data: pd.Series) -> pd.Series:
127
- res = pd.Series(np.zeros(data.shape), index=data.index, dtype="int")
128
- res.update((data[data < self.zero_bound_low] - self.zero_bound_low) // self.step)
129
- res.update((data[data >= self.zero_bound_high] - self.zero_bound_high) // self.step + 1)
130
-
131
- return res
@@ -1333,6 +1333,9 @@ class FeaturesEnricher(TransformerMixin):
1333
1333
  excluding_search_keys = list(search_keys.keys())
1334
1334
  if search_keys_for_metrics is not None and len(search_keys_for_metrics) > 0:
1335
1335
  excluding_search_keys = [sk for sk in excluding_search_keys if sk not in search_keys_for_metrics]
1336
+ meta = self._search_task.get_all_features_metadata_v2()
1337
+ zero_importance_client_features = [m for m in meta if m.source == "etalon" and m.shap_value == 0.0]
1338
+
1336
1339
  client_features = [
1337
1340
  c
1338
1341
  for c in X_sampled.columns.to_list()
@@ -1341,6 +1344,7 @@ class FeaturesEnricher(TransformerMixin):
1341
1344
  excluding_search_keys
1342
1345
  + list(self.fit_dropped_features)
1343
1346
  + [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID]
1347
+ + zero_importance_client_features
1344
1348
  )
1345
1349
  ]
1346
1350
 
@@ -2845,8 +2849,10 @@ class FeaturesEnricher(TransformerMixin):
2845
2849
  maybe_date_col = self._get_date_column(self.search_keys)
2846
2850
  if X is not None and maybe_date_col is not None and maybe_date_col in X.columns:
2847
2851
  # TODO cast date column to single dtype
2848
- min_date = X[maybe_date_col].min()
2849
- max_date = X[maybe_date_col].max()
2852
+ date_converter = DateTimeSearchKeyConverter(maybe_date_col, self.date_format)
2853
+ converted_X = date_converter.convert(X)
2854
+ min_date = converted_X[maybe_date_col].min()
2855
+ max_date = converted_X[maybe_date_col].max()
2850
2856
  self.logger.info(f"Dates interval is ({min_date}, {max_date})")
2851
2857
 
2852
2858
  except Exception:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.280a3418.post2
3
+ Version: 1.1.280.dev0
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=fgIudZxw-Qs9Gp5uX0LE_GUvruwCsSuF1VAhqXKXP80,31
1
+ upgini/__about__.py,sha256=UrHon9iXf0_L0ezeavtXrVCL8ABnaX0u5iXp6FC3PTA,29
2
2
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=uiFY-P8te7-zigib1hGWRtW5v0X7chxPM0hJFdixAN8,45623
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=NU2lpp6ZrJ3oKOOLa6u7DQ5kb64n8mDAFXadWjr219A,176290
6
+ upgini/features_enricher.py,sha256=HQHTKpyK9MGqYTZ5FMgJLvRQ0o1JwApcQklTRKxIgMs,176683
7
7
  upgini/http.py,sha256=khrYSldpY-HbVLCcApfV1BjBFK6Uyuatb4colKybxgY,42301
8
8
  upgini/metadata.py,sha256=CFJekYGD7Ep7pRFH7wCEcsXS4bz83do33FNmtcCY9P4,9729
9
9
  upgini/metrics.py,sha256=L4LKSMOK9iKFLaJvTBTKk2tQauMgiJqtfrBclM3fBjs,29670
@@ -13,13 +13,13 @@ upgini/version_validator.py,sha256=RGg87VweujTNlibgsOuqPLIEiBgIOkuXNVTGuNCD234,1
13
13
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
14
14
  upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
15
15
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- upgini/autofe/all_operands.py,sha256=VB8-iuNanjtjo3ho2shDDfWQtXGYekeoPEBzpNy1iWo,2377
16
+ upgini/autofe/all_operands.py,sha256=SyKVU-xGMHgoRZvHrCmba2u2Ygc73c1mXFolNSWe8Uo,2357
17
17
  upgini/autofe/binary.py,sha256=441BRuqMsxlxuw4c8rMZB6h5EpRdVMk-bVa03U7T5Hg,3973
18
18
  upgini/autofe/date.py,sha256=Vy1I92fLLYLhuYKJmtuPBMI8cPxE4Uwk40hqE2F2e1A,4224
19
19
  upgini/autofe/feature.py,sha256=ChSuuIbRPGIWnPjKAgZbeAEi7Y_PjSVRyxxx41MyFp0,11845
20
20
  upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
21
21
  upgini/autofe/operand.py,sha256=xgEIZuFCfckc6LpBqVu1OVK3JEabm1O-LHUsp83EHKA,2806
22
- upgini/autofe/unary.py,sha256=tbyptbPZubVNdvo36N7ftYIlFOUhQLZyJLizjIUOTJ4,3673
22
+ upgini/autofe/unary.py,sha256=v-l3aiE5hj6kurvh6adCQL8W3X9u9a7RVbS_WPR2qlw,3146
23
23
  upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
24
24
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  upgini/data_source/data_source_publisher.py,sha256=taRzyGgrPrTTSGw4Y-Ca5k4bf30aiTa68rxqT9zfqeI,16478
@@ -56,7 +56,7 @@ upgini/utils/sklearn_ext.py,sha256=c23MGSUVfxLnaDWKAxavHgnOtm5dGKkF3YswdWQcFzs,4
56
56
  upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
57
57
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
58
58
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
59
- upgini-1.1.280a3418.post2.dist-info/METADATA,sha256=lZ6l4l61_krq4P0sat3HwUocoSzaRh3NNSpq_bNa46Q,48129
60
- upgini-1.1.280a3418.post2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
61
- upgini-1.1.280a3418.post2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
62
- upgini-1.1.280a3418.post2.dist-info/RECORD,,
59
+ upgini-1.1.280.dev0.dist-info/METADATA,sha256=AlD_LoKBQRcf5DSTcQeTung2oC2xtzCzKnBqgMeVUH0,48123
60
+ upgini-1.1.280.dev0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
61
+ upgini-1.1.280.dev0.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
62
+ upgini-1.1.280.dev0.dist-info/RECORD,,