upgini 1.1.275a1__py3-none-any.whl → 1.1.275a99__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/autofe/date.py CHANGED
@@ -2,6 +2,7 @@ from typing import Any, Optional, Union
2
2
  import numpy as np
3
3
  import pandas as pd
4
4
  from pydantic import BaseModel
5
+ from pandas.core.arrays.timedeltas import TimedeltaArray
5
6
 
6
7
  from upgini.autofe.operand import PandasOperand
7
8
 
@@ -46,6 +47,7 @@ class DateDiffType2(PandasOperand, DateDiffMixin):
46
47
  future = right + (left.dt.year - right.dt.year).apply(
47
48
  lambda y: np.datetime64("NaT") if np.isnan(y) else pd.tseries.offsets.DateOffset(years=y)
48
49
  )
50
+ future = pd.to_datetime(future)
49
51
  before = future[future < left]
50
52
  future[future < left] = before + pd.tseries.offsets.DateOffset(years=1)
51
53
  diff = (future - left) / np.timedelta64(1, self.diff_unit)
@@ -72,8 +74,13 @@ class DateListDiff(PandasOperand, DateDiffMixin):
72
74
 
73
75
  return pd.Series(left - right.values).apply(lambda x: self._agg(self._diff(x)))
74
76
 
75
- def _diff(self, x):
76
- x = x / np.timedelta64(1, self.diff_unit)
77
+ def _diff(self, x: TimedeltaArray):
78
+ if self.diff_unit == "Y":
79
+ x = (x / 365 / 24 / 60 / 60 / 10**9).astype(int)
80
+ elif self.diff_unit == "M":
81
+ raise Exception("Unsupported difference unit: Month")
82
+ else:
83
+ x = x / np.timedelta64(1, self.diff_unit)
77
84
  return x[x > 0]
78
85
 
79
86
  def _agg(self, x):
@@ -48,6 +48,7 @@ class DataSourcePublisher:
48
48
  data_table_uri: str,
49
49
  search_keys: Dict[str, SearchKey],
50
50
  update_frequency: str,
51
+ exclude_from_autofe_generation: Optional[List[str]],
51
52
  secondary_search_keys: Optional[Dict[str, SearchKey]] = None,
52
53
  sort_column: Optional[str] = None,
53
54
  date_format: Optional[str] = None,
@@ -57,7 +58,6 @@ class DataSourcePublisher:
57
58
  join_date_abs_limit_days: Optional[int] = None,
58
59
  features_for_embeddings: Optional[List[str]] = DEFAULT_GENERATE_EMBEDDINGS,
59
60
  data_table_id_to_replace: Optional[str] = None,
60
- exclude_from_autofe_generation: Optional[List[str]] = None,
61
61
  _force_generation=False,
62
62
  _silent=False,
63
63
  ) -> str:
upgini/dataset.py CHANGED
@@ -22,9 +22,7 @@ from pandas.api.types import (
22
22
  from upgini.errors import ValidationError
23
23
  from upgini.http import ProgressStage, SearchProgress, _RestClient
24
24
  from upgini.metadata import (
25
- ENTITY_SYSTEM_RECORD_ID,
26
25
  EVAL_SET_INDEX,
27
- SEARCH_KEY_UNNEST,
28
26
  SYSTEM_COLUMNS,
29
27
  SYSTEM_RECORD_ID,
30
28
  TARGET,
@@ -80,7 +78,6 @@ class Dataset: # (pd.DataFrame):
80
78
  path: Optional[str] = None,
81
79
  meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
82
80
  search_keys: Optional[List[Tuple[str, ...]]] = None,
83
- unnest_search_keys: Optional[List[str]] = None,
84
81
  model_task_type: Optional[ModelTaskType] = None,
85
82
  random_state: Optional[int] = None,
86
83
  rest_client: Optional[_RestClient] = None,
@@ -115,7 +112,6 @@ class Dataset: # (pd.DataFrame):
115
112
  self.description = description
116
113
  self.meaning_types = meaning_types
117
114
  self.search_keys = search_keys
118
- self.unnest_search_keys = unnest_search_keys
119
115
  self.ignore_columns = []
120
116
  self.hierarchical_group_keys = []
121
117
  self.hierarchical_subgroup_keys = []
@@ -175,7 +171,7 @@ class Dataset: # (pd.DataFrame):
175
171
  new_columns = []
176
172
  dup_counter = 0
177
173
  for column in self.data.columns:
178
- if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]:
174
+ if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
179
175
  self.columns_renaming[column] = column
180
176
  new_columns.append(column)
181
177
  continue
@@ -356,9 +352,7 @@ class Dataset: # (pd.DataFrame):
356
352
 
357
353
  if is_string_dtype(self.data[postal_code]):
358
354
  try:
359
- self.data[postal_code] = (
360
- self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
361
- )
355
+ self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
362
356
  except Exception:
363
357
  pass
364
358
  elif is_float_dtype(self.data[postal_code]):
@@ -808,8 +802,6 @@ class Dataset: # (pd.DataFrame):
808
802
  meaningType=meaning_type,
809
803
  minMaxValues=min_max_values,
810
804
  )
811
- if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
812
- column_meta.isUnnest = True
813
805
 
814
806
  columns.append(column_meta)
815
807
 
@@ -1,4 +1,5 @@
1
1
  import dataclasses
2
+ import datetime
2
3
  import gc
3
4
  import hashlib
4
5
  import itertools
@@ -10,7 +11,6 @@ import sys
10
11
  import tempfile
11
12
  import time
12
13
  import uuid
13
- from collections import Counter
14
14
  from dataclasses import dataclass
15
15
  from threading import Thread
16
16
  from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
@@ -44,11 +44,9 @@ from upgini.mdc import MDC
44
44
  from upgini.metadata import (
45
45
  COUNTRY,
46
46
  DEFAULT_INDEX,
47
- ENTITY_SYSTEM_RECORD_ID,
48
47
  EVAL_SET_INDEX,
49
48
  ORIGINAL_INDEX,
50
49
  RENAMED_INDEX,
51
- SEARCH_KEY_UNNEST,
52
50
  SORT_ID,
53
51
  SYSTEM_RECORD_ID,
54
52
  TARGET,
@@ -149,6 +147,7 @@ class FeaturesEnricher(TransformerMixin):
149
147
  """
150
148
 
151
149
  TARGET_NAME = "target"
150
+ CURRENT_DATE = "current_date"
152
151
  RANDOM_STATE = 42
153
152
  CALCULATE_METRICS_THRESHOLD = 50_000_000
154
153
  CALCULATE_METRICS_MIN_THRESHOLD = 500
@@ -210,6 +209,7 @@ class FeaturesEnricher(TransformerMixin):
210
209
  client_ip: Optional[str] = None,
211
210
  client_visitorid: Optional[str] = None,
212
211
  custom_bundle_config: Optional[str] = None,
212
+ add_date_if_missing: bool = True,
213
213
  **kwargs,
214
214
  ):
215
215
  self.bundle = get_custom_bundle(custom_bundle_config)
@@ -320,6 +320,7 @@ class FeaturesEnricher(TransformerMixin):
320
320
  self.raise_validation_error = raise_validation_error
321
321
  self.exclude_columns = exclude_columns
322
322
  self.baseline_score_column = baseline_score_column
323
+ self.add_date_if_missing = add_date_if_missing
323
324
 
324
325
  def _get_api_key(self):
325
326
  return self._api_key
@@ -423,6 +424,9 @@ class FeaturesEnricher(TransformerMixin):
423
424
 
424
425
  self.__validate_search_keys(self.search_keys, self.search_id)
425
426
 
427
+ # Validate client estimator params
428
+ self._get_client_cat_features(estimator, X, self.search_keys)
429
+
426
430
  try:
427
431
  self.X = X
428
432
  self.y = y
@@ -816,6 +820,7 @@ class FeaturesEnricher(TransformerMixin):
816
820
  trace_id = trace_id or str(uuid.uuid4())
817
821
  start_time = time.time()
818
822
  with MDC(trace_id=trace_id):
823
+ self.logger.info("Start calculate metrics")
819
824
  if len(args) > 0:
820
825
  msg = f"WARNING: Unsupported positional arguments for calculate_metrics: {args}"
821
826
  self.logger.warning(msg)
@@ -867,22 +872,9 @@ class FeaturesEnricher(TransformerMixin):
867
872
  self.__display_support_link(msg)
868
873
  return None
869
874
 
870
- cat_features = None
871
- search_keys_for_metrics = []
872
- if (
873
- estimator is not None
874
- and hasattr(estimator, "get_param")
875
- and estimator.get_param("cat_features") is not None
876
- ):
877
- cat_features = estimator.get_param("cat_features")
878
- if len(cat_features) > 0 and isinstance(cat_features[0], int):
879
- cat_features = [effective_X.columns[i] for i in cat_features]
880
- for cat_feature in cat_features:
881
- if cat_feature in self.search_keys:
882
- if self.search_keys[cat_feature] in [SearchKey.COUNTRY, SearchKey.POSTAL_CODE]:
883
- search_keys_for_metrics.append(cat_feature)
884
- else:
885
- raise ValidationError(self.bundle.get("cat_feature_search_key").format(cat_feature))
875
+ cat_features, search_keys_for_metrics = self._get_client_cat_features(
876
+ estimator, effective_X, self.search_keys
877
+ )
886
878
 
887
879
  prepared_data = self._prepare_data_for_metrics(
888
880
  trace_id=trace_id,
@@ -897,6 +889,7 @@ class FeaturesEnricher(TransformerMixin):
897
889
  search_keys_for_metrics=search_keys_for_metrics,
898
890
  progress_bar=progress_bar,
899
891
  progress_callback=progress_callback,
892
+ cat_features=cat_features,
900
893
  )
901
894
  if prepared_data is None:
902
895
  return None
@@ -1184,8 +1177,6 @@ class FeaturesEnricher(TransformerMixin):
1184
1177
  search_keys = self.search_keys.copy()
1185
1178
  search_keys = self.__prepare_search_keys(x, search_keys, is_demo_dataset, is_transform=True, silent_mode=True)
1186
1179
 
1187
- unnest_search_keys = []
1188
-
1189
1180
  extended_X = x.copy()
1190
1181
  generated_features = []
1191
1182
  date_column = self._get_date_column(search_keys)
@@ -1196,7 +1187,7 @@ class FeaturesEnricher(TransformerMixin):
1196
1187
  email_column = self._get_email_column(search_keys)
1197
1188
  hem_column = self._get_hem_column(search_keys)
1198
1189
  if email_column:
1199
- converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, unnest_search_keys, self.logger)
1190
+ converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
1200
1191
  extended_X = converter.convert(extended_X)
1201
1192
  generated_features.extend(converter.generated_features)
1202
1193
  if (
@@ -1274,6 +1265,29 @@ class FeaturesEnricher(TransformerMixin):
1274
1265
 
1275
1266
  return _cv, groups
1276
1267
 
1268
+ def _get_client_cat_features(
1269
+ self, estimator: Optional[Any], X: pd.DataFrame, search_keys: Dict[str, SearchKey]
1270
+ ) -> Optional[List[str]]:
1271
+ cat_features = None
1272
+ search_keys_for_metrics = []
1273
+ if (
1274
+ estimator is not None
1275
+ and hasattr(estimator, "get_param")
1276
+ and estimator.get_param("cat_features") is not None
1277
+ ):
1278
+ cat_features = estimator.get_param("cat_features")
1279
+ if len(cat_features) > 0:
1280
+ if all([isinstance(f, int) for f in cat_features]):
1281
+ cat_features = [X.columns[i] for i in cat_features]
1282
+ self.logger.info(f"Collected categorical features {cat_features} from user estimator")
1283
+ for cat_feature in cat_features:
1284
+ if cat_feature in search_keys:
1285
+ if search_keys[cat_feature] in [SearchKey.COUNTRY, SearchKey.POSTAL_CODE]:
1286
+ search_keys_for_metrics.append(cat_feature)
1287
+ else:
1288
+ raise ValidationError(self.bundle.get("cat_feature_search_key").format(cat_feature))
1289
+ return cat_features, search_keys_for_metrics
1290
+
1277
1291
  def _prepare_data_for_metrics(
1278
1292
  self,
1279
1293
  trace_id: str,
@@ -1288,6 +1302,7 @@ class FeaturesEnricher(TransformerMixin):
1288
1302
  search_keys_for_metrics: Optional[List[str]] = None,
1289
1303
  progress_bar: Optional[ProgressBar] = None,
1290
1304
  progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
1305
+ cat_features: Optional[List[str]] = None,
1291
1306
  ):
1292
1307
  is_input_same_as_fit, X, y, eval_set = self._is_input_same_as_fit(X, y, eval_set)
1293
1308
  is_demo_dataset = hash_input(X, y, eval_set) in DEMO_DATASET_HASHES
@@ -1345,9 +1360,8 @@ class FeaturesEnricher(TransformerMixin):
1345
1360
 
1346
1361
  # Detect and drop high cardinality columns in train
1347
1362
  columns_with_high_cardinality = FeaturesValidator.find_high_cardinality(fitting_X)
1348
- columns_with_high_cardinality = [
1349
- c for c in columns_with_high_cardinality if c not in (self.generate_features or [])
1350
- ]
1363
+ non_excluding_columns = (self.generate_features or []) + (cat_features or [])
1364
+ columns_with_high_cardinality = [c for c in columns_with_high_cardinality if c not in non_excluding_columns]
1351
1365
  if len(columns_with_high_cardinality) > 0:
1352
1366
  self.logger.warning(
1353
1367
  f"High cardinality columns {columns_with_high_cardinality} will be dropped for metrics calculation"
@@ -1809,10 +1823,11 @@ class FeaturesEnricher(TransformerMixin):
1809
1823
  else:
1810
1824
  features_section = ""
1811
1825
 
1812
- api_example = f"""curl 'https://inference-upgini.azurewebsites.net/api/http_inference_trigger' \\
1826
+ search_id = self._search_task.search_task_id
1827
+ api_example = f"""curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
1813
1828
  -H 'Authorization: {self.api_key}' \\
1814
1829
  -H 'Content-Type: application/json' \\
1815
- -d '{{"search_id": "{self._search_task.search_task_id}", "search_keys": {keys}{features_section}}}'"""
1830
+ -d '{{"search_keys": {keys}{features_section}}}'"""
1816
1831
  return api_example
1817
1832
 
1818
1833
  def _get_copy_of_runtime_parameters(self) -> RuntimeParameters:
@@ -1907,38 +1922,13 @@ class FeaturesEnricher(TransformerMixin):
1907
1922
  generated_features.extend(converter.generated_features)
1908
1923
  else:
1909
1924
  self.logger.info("Input dataset hasn't date column")
1910
-
1911
- # Don't pass all features in backend on transform
1912
- original_features_for_transform = []
1913
- runtime_parameters = self._get_copy_of_runtime_parameters()
1914
- features_not_to_pass = [column for column in df.columns if column not in search_keys.keys()]
1915
- if len(features_not_to_pass) > 0:
1916
- # Pass only features that need for transform
1917
- features_for_transform = self._search_task.get_features_for_transform()
1918
- if features_for_transform is not None and len(features_for_transform) > 0:
1919
- file_metadata = self._search_task.get_file_metadata(trace_id)
1920
- original_features_for_transform = [
1921
- c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
1922
- ]
1923
-
1924
- runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
1925
-
1926
- columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
1927
-
1928
- df[ENTITY_SYSTEM_RECORD_ID] = pd.util.hash_pandas_object(
1929
- df[columns_for_system_record_id], index=False
1930
- ).astype("Float64")
1931
-
1932
- # Explode multiple search keys
1933
- df, unnest_search_keys = self._explode_multiple_search_keys(df, search_keys)
1934
-
1925
+ if self.add_date_if_missing:
1926
+ df = self._add_current_date_as_key(df, search_keys, self.logger, self.bundle)
1935
1927
  email_column = self._get_email_column(search_keys)
1936
1928
  hem_column = self._get_hem_column(search_keys)
1937
1929
  email_converted_to_hem = False
1938
1930
  if email_column:
1939
- converter = EmailSearchKeyConverter(
1940
- email_column, hem_column, search_keys, unnest_search_keys, self.logger
1941
- )
1931
+ converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
1942
1932
  df = converter.convert(df)
1943
1933
  generated_features.extend(converter.generated_features)
1944
1934
  email_converted_to_hem = converter.email_converted_to_hem
@@ -1952,21 +1942,30 @@ class FeaturesEnricher(TransformerMixin):
1952
1942
  generated_features = [f for f in generated_features if f in self.fit_generated_features]
1953
1943
 
1954
1944
  meaning_types = {col: key.value for col, key in search_keys.items()}
1955
- # non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
1956
- for col in original_features_for_transform:
1957
- meaning_types[col] = FileColumnMeaningType.FEATURE
1958
- features_not_to_pass = [column for column in features_not_to_pass if column not in search_keys.keys()]
1945
+ non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
1959
1946
 
1960
1947
  if email_converted_to_hem:
1961
- features_not_to_pass.append(email_column)
1948
+ non_keys_columns.append(email_column)
1949
+
1950
+ # Don't pass features in backend on transform
1951
+ original_features_for_transform = None
1952
+ runtime_parameters = self._get_copy_of_runtime_parameters()
1953
+ if len(non_keys_columns) > 0:
1954
+ # Pass only features that need for transform
1955
+ features_for_transform = self._search_task.get_features_for_transform()
1956
+ if features_for_transform is not None and len(features_for_transform) > 0:
1957
+ file_metadata = self._search_task.get_file_metadata(trace_id)
1958
+ original_features_for_transform = [
1959
+ c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
1960
+ ]
1961
+ non_keys_columns = [c for c in non_keys_columns if c not in original_features_for_transform]
1962
1962
 
1963
- features_not_to_pass = [c for c in features_not_to_pass if c not in original_features_for_transform]
1964
- columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
1963
+ runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
1965
1964
 
1966
1965
  if add_fit_system_record_id:
1967
1966
  df = self.__add_fit_system_record_id(df, dict(), search_keys)
1968
1967
  df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
1969
- features_not_to_pass.append(SORT_ID)
1968
+ non_keys_columns.append(SORT_ID)
1970
1969
 
1971
1970
  columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform or []))
1972
1971
 
@@ -1974,19 +1973,16 @@ class FeaturesEnricher(TransformerMixin):
1974
1973
  "Float64"
1975
1974
  )
1976
1975
  meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
1977
- meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
1978
- if SEARCH_KEY_UNNEST in df.columns:
1979
- meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
1980
1976
 
1981
1977
  df = df.reset_index(drop=True)
1982
- system_columns_with_original_index = [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID] + generated_features
1978
+ system_columns_with_original_index = [SYSTEM_RECORD_ID] + generated_features
1983
1979
  if add_fit_system_record_id:
1984
1980
  system_columns_with_original_index.append(SORT_ID)
1985
1981
  df_with_original_index = df[system_columns_with_original_index].copy()
1986
1982
 
1987
1983
  combined_search_keys = combine_search_keys(search_keys.keys())
1988
1984
 
1989
- df_without_features = df.drop(columns=features_not_to_pass)
1985
+ df_without_features = df.drop(columns=non_keys_columns)
1990
1986
 
1991
1987
  df_without_features = clean_full_duplicates(
1992
1988
  df_without_features, self.logger, silent=silent_mode, bundle=self.bundle
@@ -2142,14 +2138,6 @@ class FeaturesEnricher(TransformerMixin):
2142
2138
 
2143
2139
  key_types = search_keys.values()
2144
2140
 
2145
- # Multiple search keys allowed only for PHONE, IP, POSTAL_CODE, EMAIL, HEM
2146
- multi_keys = [key for key, count in Counter(key_types).items() if count > 1]
2147
- for multi_key in multi_keys:
2148
- if multi_key not in [SearchKey.PHONE, SearchKey.IP, SearchKey.POSTAL_CODE, SearchKey.EMAIL, SearchKey.HEM]:
2149
- msg = self.bundle.get("unsupported_multi_key").format(multi_key)
2150
- self.logger.warning(msg)
2151
- raise ValidationError(msg)
2152
-
2153
2141
  if SearchKey.DATE in key_types and SearchKey.DATETIME in key_types:
2154
2142
  msg = self.bundle.get("date_and_datetime_simultanious")
2155
2143
  self.logger.warning(msg)
@@ -2165,11 +2153,11 @@ class FeaturesEnricher(TransformerMixin):
2165
2153
  self.logger.warning(msg)
2166
2154
  raise ValidationError(msg)
2167
2155
 
2168
- # for key_type in SearchKey.__members__.values():
2169
- # if key_type != SearchKey.CUSTOM_KEY and list(key_types).count(key_type) > 1:
2170
- # msg = self.bundle.get("multiple_search_key").format(key_type)
2171
- # self.logger.warning(msg)
2172
- # raise ValidationError(msg)
2156
+ for key_type in SearchKey.__members__.values():
2157
+ if key_type != SearchKey.CUSTOM_KEY and list(key_types).count(key_type) > 1:
2158
+ msg = self.bundle.get("multiple_search_key").format(key_type)
2159
+ self.logger.warning(msg)
2160
+ raise ValidationError(msg)
2173
2161
 
2174
2162
  # non_personal_keys = set(SearchKey.__members__.values()) - set(SearchKey.personal_keys())
2175
2163
  # if (
@@ -2305,7 +2293,16 @@ class FeaturesEnricher(TransformerMixin):
2305
2293
  self.fit_generated_features.extend(converter.generated_features)
2306
2294
  else:
2307
2295
  self.logger.info("Input dataset hasn't date column")
2308
-
2296
+ if self.add_date_if_missing:
2297
+ df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
2298
+ email_column = self._get_email_column(self.fit_search_keys)
2299
+ hem_column = self._get_hem_column(self.fit_search_keys)
2300
+ email_converted_to_hem = False
2301
+ if email_column:
2302
+ converter = EmailSearchKeyConverter(email_column, hem_column, self.fit_search_keys, self.logger)
2303
+ df = converter.convert(df)
2304
+ self.fit_generated_features.extend(converter.generated_features)
2305
+ email_converted_to_hem = converter.email_converted_to_hem
2309
2306
  if (
2310
2307
  self.detect_missing_search_keys
2311
2308
  and list(self.fit_search_keys.values()) == [SearchKey.DATE]
@@ -2314,37 +2311,7 @@ class FeaturesEnricher(TransformerMixin):
2314
2311
  converter = IpToCountrySearchKeyConverter(self.fit_search_keys, self.logger)
2315
2312
  df = converter.convert(df)
2316
2313
 
2317
- # Explode multiple search keys
2318
2314
  non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX] + list(self.fit_search_keys.keys())
2319
- meaning_types = {
2320
- **{col: key.value for col, key in self.fit_search_keys.items()},
2321
- **{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
2322
- }
2323
- meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
2324
- if eval_set is not None and len(eval_set) > 0:
2325
- meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
2326
- df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys, ENTITY_SYSTEM_RECORD_ID)
2327
-
2328
- # TODO check that this is correct for enrichment
2329
- self.df_with_original_index = df.copy()
2330
-
2331
- df, unnest_search_keys = self._explode_multiple_search_keys(df, self.fit_search_keys)
2332
-
2333
- # Convert EMAIL to HEM after unnesting to do it only with one column
2334
- email_column = self._get_email_column(self.fit_search_keys)
2335
- hem_column = self._get_hem_column(self.fit_search_keys)
2336
- email_converted_to_hem = False
2337
- if email_column:
2338
- converter = EmailSearchKeyConverter(
2339
- email_column, hem_column, self.fit_search_keys, unnest_search_keys, self.logger
2340
- )
2341
- df = converter.convert(df)
2342
- self.fit_generated_features.extend(converter.generated_features)
2343
- email_converted_to_hem = converter.email_converted_to_hem
2344
-
2345
- non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST] + list(
2346
- self.fit_search_keys.keys()
2347
- )
2348
2315
  if email_converted_to_hem:
2349
2316
  non_feature_columns.append(email_column)
2350
2317
  if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
@@ -2368,14 +2335,12 @@ class FeaturesEnricher(TransformerMixin):
2368
2335
  **{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
2369
2336
  }
2370
2337
  meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
2371
- meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
2372
- if SEARCH_KEY_UNNEST in df.columns:
2373
- meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
2374
2338
  if eval_set is not None and len(eval_set) > 0:
2375
2339
  meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
2376
2340
 
2377
- df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys, SYSTEM_RECORD_ID)
2341
+ df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys)
2378
2342
 
2343
+ self.df_with_original_index = df.copy()
2379
2344
  df = df.reset_index(drop=True).sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True)
2380
2345
 
2381
2346
  combined_search_keys = combine_search_keys(self.fit_search_keys.keys())
@@ -2383,15 +2348,14 @@ class FeaturesEnricher(TransformerMixin):
2383
2348
  dataset = Dataset(
2384
2349
  "tds_" + str(uuid.uuid4()),
2385
2350
  df=df,
2386
- meaning_types=meaning_types,
2387
- search_keys=combined_search_keys,
2388
- unnest_search_keys=unnest_search_keys,
2389
2351
  model_task_type=model_task_type,
2390
2352
  date_format=self.date_format,
2391
2353
  random_state=self.random_state,
2392
2354
  rest_client=self.rest_client,
2393
2355
  logger=self.logger,
2394
2356
  )
2357
+ dataset.meaning_types = meaning_types
2358
+ dataset.search_keys = combined_search_keys
2395
2359
  if email_converted_to_hem:
2396
2360
  dataset.ignore_columns = [email_column]
2397
2361
 
@@ -2911,6 +2875,25 @@ class FeaturesEnricher(TransformerMixin):
2911
2875
  if t in [SearchKey.DATE, SearchKey.DATETIME]:
2912
2876
  return col
2913
2877
 
2878
+ @staticmethod
2879
+ def _add_current_date_as_key(
2880
+ df: pd.DataFrame, search_keys: Dict[str, SearchKey], logger: logging.Logger, bundle: ResourceBundle
2881
+ ) -> pd.DataFrame:
2882
+ if (
2883
+ set(search_keys.values()) == {SearchKey.PHONE}
2884
+ or set(search_keys.values()) == {SearchKey.EMAIL}
2885
+ or set(search_keys.values()) == {SearchKey.HEM}
2886
+ or set(search_keys.values()) == {SearchKey.COUNTRY, SearchKey.POSTAL_CODE}
2887
+ ):
2888
+ msg = bundle.get("current_date_added")
2889
+ print(msg)
2890
+ logger.warning(msg)
2891
+ df[FeaturesEnricher.CURRENT_DATE] = datetime.date.today()
2892
+ search_keys[FeaturesEnricher.CURRENT_DATE] = SearchKey.DATE
2893
+ converter = DateTimeSearchKeyConverter(FeaturesEnricher.CURRENT_DATE, None, logger, bundle)
2894
+ df = converter.convert(df)
2895
+ return df
2896
+
2914
2897
  @staticmethod
2915
2898
  def _get_group_columns(df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> List[str]:
2916
2899
  return [
@@ -2921,19 +2904,15 @@ class FeaturesEnricher(TransformerMixin):
2921
2904
 
2922
2905
  @staticmethod
2923
2906
  def _get_email_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
2924
- cols = [col for col, t in search_keys.items() if t == SearchKey.EMAIL]
2925
- if len(cols) > 1:
2926
- raise Exception("More than one email column found after unnest")
2927
- if len(cols) == 1:
2928
- return cols[0]
2907
+ for col, t in search_keys.items():
2908
+ if t == SearchKey.EMAIL:
2909
+ return col
2929
2910
 
2930
2911
  @staticmethod
2931
2912
  def _get_hem_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
2932
- cols = [col for col, t in search_keys.items() if t == SearchKey.HEM]
2933
- if len(cols) > 1:
2934
- raise Exception("More than one hem column found after unnest")
2935
- if len(cols) == 1:
2936
- return cols[0]
2913
+ for col, t in search_keys.items():
2914
+ if t == SearchKey.HEM:
2915
+ return col
2937
2916
 
2938
2917
  @staticmethod
2939
2918
  def _get_phone_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
@@ -2941,42 +2920,8 @@ class FeaturesEnricher(TransformerMixin):
2941
2920
  if t == SearchKey.PHONE:
2942
2921
  return col
2943
2922
 
2944
- def _explode_multiple_search_keys(self, df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> pd.DataFrame:
2945
- # find groups of multiple search keys
2946
- search_key_names_by_type: Dict[SearchKey, str] = dict()
2947
- for key_name, key_type in search_keys.items():
2948
- search_key_names_by_type[key_type] = search_key_names_by_type.get(key_type, []) + [key_name]
2949
- search_key_names_by_type = {
2950
- key_type: key_names for key_type, key_names in search_key_names_by_type.items() if len(key_names) > 1
2951
- }
2952
- if len(search_key_names_by_type) == 0:
2953
- return df, []
2954
-
2955
- multiple_keys_columns = [col for cols in search_key_names_by_type.values() for col in cols]
2956
- other_columns = [col for col in df.columns if col not in multiple_keys_columns]
2957
- exploded_dfs = []
2958
- unnest_search_keys = []
2959
-
2960
- for key_type, key_names in search_key_names_by_type.items():
2961
- new_search_key = f"upgini_{key_type.name.lower()}_unnest"
2962
- exploded_df = pd.melt(
2963
- df, id_vars=other_columns, value_vars=key_names, var_name=SEARCH_KEY_UNNEST, value_name=new_search_key
2964
- )
2965
- exploded_dfs.append(exploded_df)
2966
- for old_key in key_names:
2967
- del search_keys[old_key]
2968
- search_keys[new_search_key] = key_type
2969
- unnest_search_keys.append(new_search_key)
2970
-
2971
- df = pd.concat(exploded_dfs, ignore_index=True)
2972
- return df, unnest_search_keys
2973
-
2974
2923
  def __add_fit_system_record_id(
2975
- self,
2976
- df: pd.DataFrame,
2977
- meaning_types: Dict[str, FileColumnMeaningType],
2978
- search_keys: Dict[str, SearchKey],
2979
- id_name: str,
2924
+ self, df: pd.DataFrame, meaning_types: Dict[str, FileColumnMeaningType], search_keys: Dict[str, SearchKey]
2980
2925
  ) -> pd.DataFrame:
2981
2926
  # save original order or rows
2982
2927
  original_index_name = df.index.name
@@ -3025,18 +2970,14 @@ class FeaturesEnricher(TransformerMixin):
3025
2970
 
3026
2971
  df = df.reset_index(drop=True).reset_index()
3027
2972
  # system_record_id saves correct order for fit
3028
- df = df.rename(columns={DEFAULT_INDEX: id_name})
2973
+ df = df.rename(columns={DEFAULT_INDEX: SYSTEM_RECORD_ID})
3029
2974
 
3030
2975
  # return original order
3031
2976
  df = df.set_index(ORIGINAL_INDEX)
3032
2977
  df.index.name = original_index_name
3033
2978
  df = df.sort_values(by=original_order_name).drop(columns=original_order_name)
3034
2979
 
3035
- meaning_types[id_name] = (
3036
- FileColumnMeaningType.SYSTEM_RECORD_ID
3037
- if id_name == SYSTEM_RECORD_ID
3038
- else FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
3039
- )
2980
+ meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
3040
2981
  return df
3041
2982
 
3042
2983
  def __correct_target(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -3091,10 +3032,7 @@ class FeaturesEnricher(TransformerMixin):
3091
3032
  )
3092
3033
 
3093
3034
  comparing_columns = X.columns if is_transform else df_with_original_index.columns
3094
- dup_features = [
3095
- c for c in comparing_columns
3096
- if c in result_features.columns and c not in [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
3097
- ]
3035
+ dup_features = [c for c in comparing_columns if c in result_features.columns and c != SYSTEM_RECORD_ID]
3098
3036
  if len(dup_features) > 0:
3099
3037
  self.logger.warning(f"X contain columns with same name as returned from backend: {dup_features}")
3100
3038
  raise ValidationError(self.bundle.get("returned_features_same_as_passed").format(dup_features))
@@ -3105,7 +3043,8 @@ class FeaturesEnricher(TransformerMixin):
3105
3043
  result_features = pd.merge(
3106
3044
  df_with_original_index,
3107
3045
  result_features,
3108
- on=ENTITY_SYSTEM_RECORD_ID,
3046
+ left_on=SYSTEM_RECORD_ID,
3047
+ right_on=SYSTEM_RECORD_ID,
3109
3048
  how="left" if is_transform else "inner",
3110
3049
  )
3111
3050
  result_features = result_features.set_index(original_index_name or DEFAULT_INDEX)
@@ -3485,13 +3424,13 @@ class FeaturesEnricher(TransformerMixin):
3485
3424
  self.warning_counter.increment()
3486
3425
 
3487
3426
  if len(valid_search_keys) == 1:
3488
- key, value = list(valid_search_keys.items())[0]
3489
- # Show warning for country only if country is the only key
3490
- if x[key].nunique() == 1:
3491
- msg = self.bundle.get("single_constant_search_key").format(value, x[key].values[0])
3492
- print(msg)
3493
- self.logger.warning(msg)
3494
- self.warning_counter.increment()
3427
+ for k, v in valid_search_keys.items():
3428
+ # Show warning for country only if country is the only key
3429
+ if x[k].nunique() == 1 and (v != SearchKey.COUNTRY or len(valid_search_keys) == 1):
3430
+ msg = self.bundle.get("single_constant_search_key").format(v, x[k].values[0])
3431
+ print(msg)
3432
+ self.logger.warning(msg)
3433
+ self.warning_counter.increment()
3495
3434
 
3496
3435
  self.logger.info(f"Prepared search keys: {valid_search_keys}")
3497
3436
 
@@ -3601,68 +3540,61 @@ class FeaturesEnricher(TransformerMixin):
3601
3540
  def check_need_detect(search_key: SearchKey):
3602
3541
  return not is_transform or search_key in self.fit_search_keys.values()
3603
3542
 
3604
- # if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
3605
- if check_need_detect(SearchKey.POSTAL_CODE):
3606
- maybe_keys = PostalCodeSearchKeyDetector().get_search_key_columns(sample, search_keys)
3607
- if maybe_keys:
3608
- new_keys = {key: SearchKey.POSTAL_CODE for key in maybe_keys}
3609
- search_keys.update(new_keys)
3610
- self.autodetected_search_keys.update(new_keys)
3611
- self.logger.info(f"Autodetected search key POSTAL_CODE in column {maybe_keys}")
3543
+ if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
3544
+ maybe_key = PostalCodeSearchKeyDetector().get_search_key_column(sample)
3545
+ if maybe_key is not None:
3546
+ search_keys[maybe_key] = SearchKey.POSTAL_CODE
3547
+ self.autodetected_search_keys[maybe_key] = SearchKey.POSTAL_CODE
3548
+ self.logger.info(f"Autodetected search key POSTAL_CODE in column {maybe_key}")
3612
3549
  if not silent_mode:
3613
- print(self.bundle.get("postal_code_detected").format(maybe_keys))
3550
+ print(self.bundle.get("postal_code_detected").format(maybe_key))
3614
3551
 
3615
3552
  if (
3616
3553
  SearchKey.COUNTRY not in search_keys.values()
3617
3554
  and self.country_code is None
3618
3555
  and check_need_detect(SearchKey.COUNTRY)
3619
3556
  ):
3620
- maybe_key = CountrySearchKeyDetector().get_search_key_columns(sample, search_keys)
3621
- if maybe_key:
3622
- search_keys[maybe_key[0]] = SearchKey.COUNTRY
3623
- self.autodetected_search_keys[maybe_key[0]] = SearchKey.COUNTRY
3557
+ maybe_key = CountrySearchKeyDetector().get_search_key_column(sample)
3558
+ if maybe_key is not None:
3559
+ search_keys[maybe_key] = SearchKey.COUNTRY
3560
+ self.autodetected_search_keys[maybe_key] = SearchKey.COUNTRY
3624
3561
  self.logger.info(f"Autodetected search key COUNTRY in column {maybe_key}")
3625
3562
  if not silent_mode:
3626
3563
  print(self.bundle.get("country_detected").format(maybe_key))
3627
3564
 
3628
3565
  if (
3629
- # SearchKey.EMAIL not in search_keys.values()
3630
- SearchKey.HEM not in search_keys.values()
3566
+ SearchKey.EMAIL not in search_keys.values()
3567
+ and SearchKey.HEM not in search_keys.values()
3631
3568
  and check_need_detect(SearchKey.HEM)
3632
3569
  ):
3633
- maybe_keys = EmailSearchKeyDetector().get_search_key_columns(sample, search_keys)
3634
- if maybe_keys:
3570
+ maybe_key = EmailSearchKeyDetector().get_search_key_column(sample)
3571
+ if maybe_key is not None and maybe_key not in search_keys.keys():
3635
3572
  if self.__is_registered or is_demo_dataset:
3636
- new_keys = {key: SearchKey.EMAIL for key in maybe_keys}
3637
- search_keys.update(new_keys)
3638
- self.autodetected_search_keys.update(new_keys)
3639
- self.logger.info(f"Autodetected search key EMAIL in column {maybe_keys}")
3573
+ search_keys[maybe_key] = SearchKey.EMAIL
3574
+ self.autodetected_search_keys[maybe_key] = SearchKey.EMAIL
3575
+ self.logger.info(f"Autodetected search key EMAIL in column {maybe_key}")
3640
3576
  if not silent_mode:
3641
- print(self.bundle.get("email_detected").format(maybe_keys))
3577
+ print(self.bundle.get("email_detected").format(maybe_key))
3642
3578
  else:
3643
3579
  self.logger.warning(
3644
- f"Autodetected search key EMAIL in column {maybe_keys}."
3645
- " But not used because not registered user"
3580
+ f"Autodetected search key EMAIL in column {maybe_key}. But not used because not registered user"
3646
3581
  )
3647
3582
  if not silent_mode:
3648
- print(self.bundle.get("email_detected_not_registered").format(maybe_keys))
3583
+ print(self.bundle.get("email_detected_not_registered").format(maybe_key))
3649
3584
  self.warning_counter.increment()
3650
3585
 
3651
- # if SearchKey.PHONE not in search_keys.values() and check_need_detect(SearchKey.PHONE):
3652
- if check_need_detect(SearchKey.PHONE):
3653
- maybe_keys = PhoneSearchKeyDetector().get_search_key_columns(sample, search_keys)
3654
- if maybe_keys:
3586
+ if SearchKey.PHONE not in search_keys.values() and check_need_detect(SearchKey.PHONE):
3587
+ maybe_key = PhoneSearchKeyDetector().get_search_key_column(sample)
3588
+ if maybe_key is not None and maybe_key not in search_keys.keys():
3655
3589
  if self.__is_registered or is_demo_dataset:
3656
- new_keys = {key: SearchKey.PHONE for key in maybe_keys}
3657
- search_keys.update(new_keys)
3658
- self.autodetected_search_keys.update(new_keys)
3659
- self.logger.info(f"Autodetected search key PHONE in column {maybe_keys}")
3590
+ search_keys[maybe_key] = SearchKey.PHONE
3591
+ self.autodetected_search_keys[maybe_key] = SearchKey.PHONE
3592
+ self.logger.info(f"Autodetected search key PHONE in column {maybe_key}")
3660
3593
  if not silent_mode:
3661
- print(self.bundle.get("phone_detected").format(maybe_keys))
3594
+ print(self.bundle.get("phone_detected").format(maybe_key))
3662
3595
  else:
3663
3596
  self.logger.warning(
3664
- f"Autodetected search key PHONE in column {maybe_keys}. "
3665
- "But not used because not registered user"
3597
+ f"Autodetected search key PHONE in column {maybe_key}. But not used because not registered user"
3666
3598
  )
3667
3599
  if not silent_mode:
3668
3600
  print(self.bundle.get("phone_detected_not_registered"))
upgini/metadata.py CHANGED
@@ -4,8 +4,6 @@ from typing import Dict, List, Optional, Set
4
4
  from pydantic import BaseModel
5
5
 
6
6
  SYSTEM_RECORD_ID = "system_record_id"
7
- ENTITY_SYSTEM_RECORD_ID = "entity_system_record_id"
8
- SEARCH_KEY_UNNEST = "search_key_unnest"
9
7
  SORT_ID = "sort_id"
10
8
  EVAL_SET_INDEX = "eval_set_index"
11
9
  TARGET = "target"
@@ -13,7 +11,7 @@ COUNTRY = "country_iso_code"
13
11
  RENAMED_INDEX = "index_col"
14
12
  DEFAULT_INDEX = "index"
15
13
  ORIGINAL_INDEX = "original_index"
16
- SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST, EVAL_SET_INDEX, TARGET, COUNTRY}
14
+ SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, EVAL_SET_INDEX, TARGET, COUNTRY, SORT_ID}
17
15
 
18
16
 
19
17
  class FileColumnMeaningType(Enum):
@@ -39,8 +37,6 @@ class FileColumnMeaningType(Enum):
39
37
  POSTAL_CODE = "POSTAL_CODE"
40
38
  SYSTEM_RECORD_ID = "SYSTEM_RECORD_ID"
41
39
  EVAL_SET_INDEX = "EVAL_SET_INDEX"
42
- ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
43
- UNNEST_KEY = "UNNEST_KEY"
44
40
 
45
41
 
46
42
  class SearchKey(Enum):
@@ -186,10 +182,6 @@ class FileColumnMetadata(BaseModel):
186
182
  meaningType: FileColumnMeaningType
187
183
  minMaxValues: Optional[NumericInterval] = None
188
184
  originalName: Optional[str]
189
- # is this column contains keys from multiple key columns like msisdn1, msisdn2
190
- isUnnest: bool = False,
191
- # list of original etalon key column names like msisdn1, msisdn2
192
- unnestKeyNames: Optional[list[str]]
193
185
 
194
186
 
195
187
  class FileMetadata(BaseModel):
upgini/metrics.py CHANGED
@@ -1,3 +1,4 @@
1
+ import inspect
1
2
  import logging
2
3
  import re
3
4
  from copy import deepcopy
@@ -381,6 +382,11 @@ class EstimatorWrapper:
381
382
  kwargs["estimator"] = estimator_copy
382
383
  if isinstance(estimator, CatBoostClassifier) or isinstance(estimator, CatBoostRegressor):
383
384
  if cat_features is not None:
385
+ for cat_feature in cat_features:
386
+ if cat_feature not in X.columns:
387
+ logger.error(
388
+ f"Client cat_feature `{cat_feature}` not found in X columns: {X.columns.to_list()}"
389
+ )
384
390
  estimator_copy.set_params(
385
391
  cat_features=[X.columns.get_loc(cat_feature) for cat_feature in cat_features]
386
392
  )
@@ -647,6 +653,12 @@ class OtherEstimatorWrapper(EstimatorWrapper):
647
653
  def validate_scoring_argument(scoring: Union[Callable, str, None]):
648
654
  if isinstance(scoring, str) and scoring is not None:
649
655
  _get_scorer_by_name(scoring)
656
+ elif isinstance(scoring, Callable):
657
+ spec = inspect.getfullargspec(scoring)
658
+ if len(spec.args) < 3:
659
+ raise ValidationError(
660
+ f"Invalid scoring function passed {scoring}. It should accept 3 input arguments: estimator, X, y"
661
+ )
650
662
 
651
663
 
652
664
  def _get_scorer_by_name(scoring: str) -> Tuple[Callable, str, int]:
@@ -1,7 +1,7 @@
1
1
  from typing import Optional
2
2
 
3
3
  import pandas as pd
4
- from pandas.api.types import is_float_dtype, is_int64_dtype, is_string_dtype
4
+ from pandas.api.types import is_float_dtype, is_int64_dtype, is_string_dtype, is_object_dtype
5
5
 
6
6
  from upgini.errors import ValidationError
7
7
 
@@ -44,7 +44,7 @@ class PhoneNormalizer:
44
44
  Method will remove all non numeric chars from string and convert it to int.
45
45
  None will be set for phone numbers that couldn"t be converted to int
46
46
  """
47
- if is_string_dtype(self.df[self.phone_column_name]):
47
+ if is_string_dtype(self.df[self.phone_column_name]) or is_object_dtype(self.df[self.phone_column_name]):
48
48
  convert_func = self.phone_str_to_int_safe
49
49
  elif is_float_dtype(self.df[self.phone_column_name]):
50
50
  convert_func = self.phone_float_to_int_safe
@@ -38,6 +38,7 @@ loss_selection_warn=\nWARNING: Loss `{0}` is not supported for feature selection
38
38
  loss_calc_metrics_warn=\nWARNING: Loss `{0}` is not supported for metrics calculation with {1}
39
39
  multivariate_timeseries_detected=\nWARNING: Multivariate TimeSeries detected. Blocked time series cross-validation split selected.\nMore details: https://github.com/upgini/upgini#-time-series-prediction-support
40
40
  group_k_fold_in_classification=\nWARNING: Using group K-fold cross-validation split for classification task.
41
+ current_date_added=\nWARNING: No date/datetime column was detected in X to be used as a search key. The current date will be used to match the latest version of data sources
41
42
 
42
43
  # Errors
43
44
  failed_search_by_task_id=Failed to retrieve the specified search results
@@ -87,7 +88,6 @@ unsupported_search_key_type=Unsupported type of key in search_keys: {}
87
88
  search_key_country_and_country_code=\nWARNING: SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
88
89
  empty_search_key=Search key {} is empty. Please fill values or remove this search key
89
90
  single_constant_search_key=\nWARNING: Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
90
- unsupported_multi_key=Search key {} cannot be used multiple times
91
91
  unsupported_index_column=\nWARNING: Your column with name `index` was dropped because it's reserved name is booked for system needs.
92
92
  date_string_without_format=Date column `{}` has string type, but date_format is not specified. Convert column to datetime type or pass date_format
93
93
  invalid_date_format=Failed to parse date in column `{}`. Try to pass explicit date format in date_format argument of FeaturesEnricher constructor
@@ -159,7 +159,7 @@ dataset_invalid_multiclass_target=Unexpected dtype of target for multiclass task
159
159
  dataset_invalid_regression_target=Unexpected dtype of target for regression task type: {}. Expected float
160
160
  dataset_invalid_timeseries_target=Unexpected dtype of target for timeseries task type: {}. Expected float
161
161
  dataset_to_many_multiclass_targets=The number of target classes {} exceeds the allowed threshold: {}. Please, correct your data and try again
162
- dataset_rarest_class_less_min=Frequency of the rarest class `{}` is {}, minimum frequency must be > {} for each class\nPlease, remove rows with rarest class from your dataframe
162
+ dataset_rarest_class_less_min=Count of rows with the rarest class `{}` is {}, minimum count must be > {} for each class\nPlease, remove rows with rarest class from your dataframe
163
163
  dataset_rarest_class_less_threshold=\nWARNING: Target is imbalanced and will be undersampled to the rarest class. Frequency of the rarest class `{}` is {}\nMinimum number of observations for each class to avoid undersampling {} ({}%)
164
164
  dataset_date_features=\nWARNING: Columns {} is a datetime or period type but not used as a search key, removed from X
165
165
  dataset_too_many_features=Too many features. Maximum number of features is {}
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import List, Optional
2
2
 
3
3
  import pandas as pd
4
4
 
@@ -10,18 +10,16 @@ class BaseSearchKeyDetector:
10
10
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
11
11
  raise NotImplementedError()
12
12
 
13
- def _get_search_keys_by_name(self, column_names: List[str]) -> List[str]:
14
- return [
15
- column_name
16
- for column_name in column_names
17
- if self._is_search_key_by_name(column_name)
18
- ]
13
+ def _get_search_key_by_name(self, column_names: List[str]) -> Optional[str]:
14
+ for column_name in column_names:
15
+ if self._is_search_key_by_name(column_name):
16
+ return column_name
19
17
 
20
- def get_search_key_columns(self, df: pd.DataFrame, existing_search_keys: List[str]) -> List[str]:
21
- other_columns = [col for col in df.columns if col not in existing_search_keys]
22
- columns_by_names = self._get_search_keys_by_name(other_columns)
23
- columns_by_values = []
24
- for column_name in other_columns:
18
+ def get_search_key_column(self, df: pd.DataFrame) -> Optional[str]:
19
+ maybe_column = self._get_search_key_by_name(df.columns.to_list())
20
+ if maybe_column is not None:
21
+ return maybe_column
22
+
23
+ for column_name in df.columns:
25
24
  if self._is_search_key_by_values(df[column_name]):
26
- columns_by_values.append(column_name)
27
- return list(set(columns_by_names + columns_by_values))
25
+ return column_name
@@ -100,6 +100,9 @@ class DateTimeSearchKeyConverter:
100
100
  msg = self.bundle.get("unsupported_date_type").format(self.date_column)
101
101
  self.logger.warning(msg)
102
102
  raise ValidationError(msg)
103
+ else:
104
+ df[self.date_column] = df[self.date_column].astype("string").apply(self.clean_date)
105
+ df[self.date_column] = self.parse_date(df)
103
106
 
104
107
  # If column with date is datetime then extract seconds of the day and minute of the hour
105
108
  # as additional features
@@ -3,15 +3,7 @@ from typing import Dict, List, Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
 
6
- from upgini.metadata import (
7
- ENTITY_SYSTEM_RECORD_ID,
8
- EVAL_SET_INDEX,
9
- SORT_ID,
10
- SYSTEM_RECORD_ID,
11
- TARGET,
12
- ModelTaskType,
13
- SearchKey,
14
- )
6
+ from upgini.metadata import EVAL_SET_INDEX, SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
15
7
  from upgini.resource_bundle import ResourceBundle
16
8
  from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
17
9
  from upgini.utils.target_utils import define_task
@@ -151,8 +143,6 @@ def clean_full_duplicates(
151
143
  unique_columns = df.columns.tolist()
152
144
  if SYSTEM_RECORD_ID in unique_columns:
153
145
  unique_columns.remove(SYSTEM_RECORD_ID)
154
- if ENTITY_SYSTEM_RECORD_ID in unique_columns:
155
- unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
156
146
  if SORT_ID in unique_columns:
157
147
  unique_columns.remove(SORT_ID)
158
148
  if EVAL_SET_INDEX in unique_columns:
@@ -38,13 +38,11 @@ class EmailSearchKeyConverter:
38
38
  email_column: str,
39
39
  hem_column: Optional[str],
40
40
  search_keys: Dict[str, SearchKey],
41
- unnest_search_keys: Optional[List[str]] = None,
42
41
  logger: Optional[logging.Logger] = None,
43
42
  ):
44
43
  self.email_column = email_column
45
44
  self.hem_column = hem_column
46
45
  self.search_keys = search_keys
47
- self.unnest_search_keys = unnest_search_keys
48
46
  if logger is not None:
49
47
  self.logger = logger
50
48
  else:
@@ -82,12 +80,9 @@ class EmailSearchKeyConverter:
82
80
  del self.search_keys[self.email_column]
83
81
  return df
84
82
  self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
85
- self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
86
83
  self.email_converted_to_hem = True
87
84
 
88
85
  del self.search_keys[self.email_column]
89
- if self.email_column in self.unnest_search_keys:
90
- self.unnest_search_keys.remove(self.email_column)
91
86
 
92
87
  df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
93
88
 
@@ -81,7 +81,8 @@ class FeaturesValidator:
81
81
  return [
82
82
  i
83
83
  for i in df
84
- if (is_string_dtype(df[i]) or is_integer_dtype(df[i])) and (df[i].nunique(dropna=False) / row_count >= 0.95)
84
+ if (is_object_dtype(df[i]) or is_string_dtype(df[i]) or is_integer_dtype(df[i]))
85
+ and (df[i].nunique(dropna=False) / row_count >= 0.85)
85
86
  ]
86
87
 
87
88
  @staticmethod
@@ -55,7 +55,7 @@ def _get_execution_ide() -> str:
55
55
  def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optional[str] = None) -> dict:
56
56
  # default values
57
57
  track = {"ide": _get_execution_ide()}
58
- ident_res = "https://api.ipify.org"
58
+ ident_res = "https://api64.ipify.org"
59
59
 
60
60
  try:
61
61
  track["hostname"] = socket.gethostname()
@@ -74,17 +74,20 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
74
74
  display(
75
75
  Javascript(
76
76
  """
77
- import('https://upgini.github.io/upgini/js/a.js')
77
+ async function getVisitorId() {
78
+ return import('https://upgini.github.io/upgini/js/a.js')
78
79
  .then(FingerprintJS => FingerprintJS.load())
79
80
  .then(fp => fp.get())
80
- .then(result => window.visitorId = result.visitorId);
81
+ .then(result => result.visitorId);
82
+ }
81
83
  """
82
84
  )
83
85
  )
84
- track["visitorId"] = output.eval_js("window.visitorId", timeout_sec=10)
86
+ track["visitorId"] = output.eval_js("getVisitorId()", timeout_sec=30)
85
87
  except Exception as e:
86
88
  track["err"] = str(e)
87
- track["visitorId"] = "None"
89
+ if "visitorId" not in track:
90
+ track["visitorId"] = "None"
88
91
  if client_ip:
89
92
  track["ip"] = client_ip
90
93
  else:
@@ -95,16 +98,19 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
95
98
  display(
96
99
  Javascript(
97
100
  f"""
98
- fetch("{ident_res}")
101
+ async function getIP() {{
102
+ return fetch("{ident_res}")
99
103
  .then(response => response.text())
100
- .then(data => window.clientIP = data);
104
+ .then(data => data);
105
+ }}
101
106
  """
102
107
  )
103
108
  )
104
- track["ip"] = output.eval_js("window.clientIP", timeout_sec=10)
109
+ track["ip"] = output.eval_js("getIP()", timeout_sec=10)
105
110
  except Exception as e:
106
111
  track["err"] = str(e)
107
- track["ip"] = "0.0.0.0"
112
+ if "ip" not in track:
113
+ track["ip"] = "0.0.0.0"
108
114
 
109
115
  elif track["ide"] == "binder":
110
116
  try:
@@ -116,8 +122,10 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
116
122
  track["visitorId"] = sha256(os.environ["CLIENT_IP"].encode()).hexdigest()
117
123
  except Exception as e:
118
124
  track["err"] = str(e)
119
- track["ip"] = "0.0.0.0"
120
- track["visitorId"] = "None"
125
+ if "ip" not in track:
126
+ track["ip"] = "0.0.0.0"
127
+ if "visitorId" not in track:
128
+ track["visitorId"] = "None"
121
129
 
122
130
  elif track["ide"] == "kaggle":
123
131
  try:
@@ -136,8 +144,8 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
136
144
  raise Exception(err)
137
145
  except Exception as e:
138
146
  track["err"] = str(e)
139
- track["ip"] = "0.0.0.0"
140
- track["visitorId"] = "None"
147
+ if "visitorId" not in track:
148
+ track["visitorId"] = "None"
141
149
  else:
142
150
  try:
143
151
  if client_ip:
@@ -150,5 +158,9 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
150
158
  track["visitorId"] = sha256(str(getnode()).encode()).hexdigest()
151
159
  except Exception as e:
152
160
  track["err"] = str(e)
161
+ if "visitorId" not in track:
162
+ track["visitorId"] = "None"
163
+ if "ip" not in track:
164
+ track["ip"] = "0.0.0.0"
153
165
 
154
166
  return track
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.275a1
3
+ Version: 1.1.275a99
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -28,7 +28,7 @@ Description-Content-Type: text/markdown
28
28
  License-File: LICENSE
29
29
  Requires-Dist: python-dateutil >=2.8.0
30
30
  Requires-Dist: requests >=2.8.0
31
- Requires-Dist: pandas <2.0.0,>=1.1.0
31
+ Requires-Dist: pandas <3.0.0,>=1.1.0
32
32
  Requires-Dist: numpy >=1.19.0
33
33
  Requires-Dist: scikit-learn >=1.3.0
34
34
  Requires-Dist: pydantic <2.0.0,>=1.8.2
@@ -1,12 +1,12 @@
1
1
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
2
2
  upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
3
- upgini/dataset.py,sha256=g10BnbayclZMno9mAabpz_Zu0iyMiW0f_jOwt_xJr8U,45947
3
+ upgini/dataset.py,sha256=xb4gIANyGbdcuM8Awyq2pJPiH_3k_LEbETApJgAoRBA,45529
4
4
  upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
5
- upgini/features_enricher.py,sha256=CgUBRCPW_itgBfaup3Tg_yfPYMbQpufoOqu4yYvn6VU,179316
5
+ upgini/features_enricher.py,sha256=A03SPhpJNxpZiAq6aSKiVOG6mqo3YrZ9MQRwkk8_OSg,176071
6
6
  upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
7
7
  upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
8
- upgini/metadata.py,sha256=FFwTnoMxdJ-7oKXbRgght1yk7e2u90WpeqljKDWUj18,10106
9
- upgini/metrics.py,sha256=VmxVc-plbRPZ1U3Ve3E-FZkhYqi0X2r7x8H5L-shux4,29058
8
+ upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
9
+ upgini/metrics.py,sha256=tGzdn0jgup86OlH_GS4eoza8ZJZ9wgaJr7SaX3Upwzo,29652
10
10
  upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
11
11
  upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
12
12
  upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
@@ -15,38 +15,38 @@ upgini/ads_management/ads_manager.py,sha256=fP4Yqx3h2Snw5X335TbXEwFoupq1RYsE7y0P
15
15
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  upgini/autofe/all_operands.py,sha256=H66wqVLD-H9k8A4-q2wslhV9QaNxlb49f8YiT0Xfkps,2356
17
17
  upgini/autofe/binary.py,sha256=f8LQqZi9zyaMUAv-jASMmWNA_vT05ncYCjZq0qx3USs,3972
18
- upgini/autofe/date.py,sha256=cc0GMAJR0QZOI_Qp2V5UDklaXLNS_79O1GhU6GlOYzg,3895
18
+ upgini/autofe/date.py,sha256=408p8P2OTPM2D3LsEGGtaiCepKGgM1BbOCQNRzAmI6c,4223
19
19
  upgini/autofe/feature.py,sha256=2FQRGtIumNz60hFAjfLReaY18SI7HxzYZOoC5avzSjQ,11847
20
20
  upgini/autofe/groupby.py,sha256=iXRfOmOc84ooSzRhsh9GmmG7rTafX0-ekXko8s9Qs68,3089
21
21
  upgini/autofe/operand.py,sha256=dhtToPDGWtP_0u_RjayUpezJJZAgq_TzNbPH0bI9OXI,2805
22
22
  upgini/autofe/unary.py,sha256=YRTzQLttbDdOnkogWBPnBexpu7uHWSLSFAxSCu3iFdY,3145
23
23
  upgini/autofe/vector.py,sha256=5qhI_bdwaWM1l7fgCkx1tMt9R9gxWzoYCl-7WO4KiOs,604
24
24
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- upgini/data_source/data_source_publisher.py,sha256=J2lrpPuysUHPeqTSfoybBtPRTBCFu7R5KzaakhjaRDc,16485
25
+ upgini/data_source/data_source_publisher.py,sha256=taRzyGgrPrTTSGw4Y-Ca5k4bf30aiTa68rxqT9zfqeI,16478
26
26
  upgini/mdc/__init__.py,sha256=ETDh3JKbrDdPMOECiYLAa8lvKYe68mv4IY6fZa9FimA,1126
27
27
  upgini/mdc/context.py,sha256=Sl1S_InKlzzRxYqwJ2k24lawJdCKWgGJ-RIRfvzWJrk,1468
28
28
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- upgini/normalizer/phone_normalizer.py,sha256=lhwsPEnfyjeIsndW2EcQGZksXYsfxaQ1ghAzVYoDRKM,9927
29
+ upgini/normalizer/phone_normalizer.py,sha256=_SYMX4GTgwzRXArK54Jp3vUBE5d4jZxSVyze-0tqzg0,9996
30
30
  upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
31
31
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
32
- upgini/resource_bundle/strings.properties,sha256=AK5xktWWYa0smEa_ZVT7BFlXPSx7M_NTMIfXhgsnE2Y,26177
32
+ upgini/resource_bundle/strings.properties,sha256=1O779a0-Ai0j7W-Z5AznvjuV69YkJvgGhJda-6VMLOQ,26287
33
33
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
34
34
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
36
36
  upgini/sampler/random_under_sampler.py,sha256=XU4c2swPIFxVXHOPpxgM2bUao0Xm-aoMmd6fKjIuV5s,4068
37
37
  upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
38
38
  upgini/utils/__init__.py,sha256=dQ4-s8-sZ5eOBZ-mH3gEwDHTdI0wI1bUAVgVqUKKPx4,786
39
- upgini/utils/base_search_key_detector.py,sha256=VvEdamjJT1wypsH6NAfOkPp7dHo7nxhl7LhwX7Z9N5w,1025
39
+ upgini/utils/base_search_key_detector.py,sha256=DGwhXLvc8i5VZWMDr0rncFfV5GEHdsCSnLGon_W9TPs,859
40
40
  upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6PuMMjPg,3380
41
41
  upgini/utils/country_utils.py,sha256=1KXhLSNqkNYVL3on8-zK0Arc_SspUH7AMZvGZICysOU,6462
42
42
  upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
43
43
  upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
44
- upgini/utils/datetime_utils.py,sha256=4ii5WphAHlb_NRmdJx35VZpTarJbAr-AnDw3XSzUSow,10346
45
- upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwtXuV8,8770
44
+ upgini/utils/datetime_utils.py,sha256=XciFOIYI4Zi7PqQS8dHxuPDEtdtwXbOrWsiAa04v2J4,10511
45
+ upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
46
46
  upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
47
- upgini/utils/email_utils.py,sha256=0EPCxMU-huzTgb_vySiAQ8tmSUhS31Mz2BpaHGwwYO4,3772
47
+ upgini/utils/email_utils.py,sha256=3CvHXTSzlgLyGsQOXfRYVfFhfPy6OXG4uXOBWRaLfHg,3479
48
48
  upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
49
- upgini/utils/features_validator.py,sha256=P-dfjBLAMxgzOcUX1Jo1bhVp8-8WyTyF3Ef0YZ5nfRI,3269
49
+ upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
50
50
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
51
51
  upgini/utils/ip_utils.py,sha256=Zf3F2cnQmOCH09QLQHetpjMFu1PnD0cTmDymn0SnSy8,1672
52
52
  upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,408
@@ -54,10 +54,10 @@ upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3
54
54
  upgini/utils/progress_bar.py,sha256=iNXyqT3vKCeHpfiG5HHwr7Lk2cTtKViM93Fl8iZnjGc,1564
55
55
  upgini/utils/sklearn_ext.py,sha256=e1aMNXk1zUt7uFnl0FcUF0zOnaXSE7z5xBHmJPknUVs,44014
56
56
  upgini/utils/target_utils.py,sha256=9K67tkY7LWhQMO-vbbPqBaO-KriAmg_6fVz5RQRaLQc,7802
57
- upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
57
+ upgini/utils/track_info.py,sha256=p8gmuHhLamZF5JG7K9DeK-PcytQhlFCR29lyRr-wq_U,5665
58
58
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
59
- upgini-1.1.275a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
60
- upgini-1.1.275a1.dist-info/METADATA,sha256=ocZUhdmjsYXKoCXt0W3M4gfPGQ8UlFtQlYIjdD_6_w0,48158
61
- upgini-1.1.275a1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
62
- upgini-1.1.275a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
63
- upgini-1.1.275a1.dist-info/RECORD,,
59
+ upgini-1.1.275a99.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
60
+ upgini-1.1.275a99.dist-info/METADATA,sha256=6wFwtaOYKQ4o9mZBpQlJqST1_r1YaTkwqgYAi7zkkHM,48159
61
+ upgini-1.1.275a99.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
62
+ upgini-1.1.275a99.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
63
+ upgini-1.1.275a99.dist-info/RECORD,,