upgini 1.2.29a7__tar.gz → 1.2.30a7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show
  1. {upgini-1.2.29a7 → upgini-1.2.30a7}/PKG-INFO +1 -1
  2. upgini-1.2.30a7/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/features_enricher.py +9 -18
  4. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/http.py +1 -1
  5. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/email_utils.py +1 -1
  6. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/features_validator.py +1 -13
  7. upgini-1.2.29a7/src/upgini/__about__.py +0 -1
  8. {upgini-1.2.29a7 → upgini-1.2.30a7}/.gitignore +0 -0
  9. {upgini-1.2.29a7 → upgini-1.2.30a7}/LICENSE +0 -0
  10. {upgini-1.2.29a7 → upgini-1.2.30a7}/README.md +0 -0
  11. {upgini-1.2.29a7 → upgini-1.2.30a7}/pyproject.toml +0 -0
  12. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/__init__.py +0 -0
  13. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/ads.py +0 -0
  14. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/ads_management/__init__.py +0 -0
  15. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/ads_management/ads_manager.py +0 -0
  16. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/autofe/__init__.py +0 -0
  17. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/autofe/all_operands.py +0 -0
  18. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/autofe/binary.py +0 -0
  19. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/autofe/date.py +0 -0
  20. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/autofe/feature.py +0 -0
  21. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/autofe/groupby.py +0 -0
  22. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/autofe/operand.py +0 -0
  23. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/autofe/unary.py +0 -0
  24. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/autofe/vector.py +0 -0
  25. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/data_source/__init__.py +0 -0
  26. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/data_source/data_source_publisher.py +0 -0
  27. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/dataset.py +0 -0
  28. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/errors.py +0 -0
  29. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/lazy_import.py +0 -0
  30. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/mdc/__init__.py +0 -0
  31. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/mdc/context.py +0 -0
  32. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/metadata.py +0 -0
  33. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/metrics.py +0 -0
  34. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/normalizer/__init__.py +0 -0
  35. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/normalizer/normalize_utils.py +0 -0
  36. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/resource_bundle/__init__.py +0 -0
  37. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/resource_bundle/exceptions.py +0 -0
  38. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/resource_bundle/strings.properties +0 -0
  39. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  40. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/sampler/__init__.py +0 -0
  41. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/sampler/base.py +0 -0
  42. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/sampler/random_under_sampler.py +0 -0
  43. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/sampler/utils.py +0 -0
  44. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/search_task.py +0 -0
  45. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/spinner.py +0 -0
  46. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  47. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/__init__.py +0 -0
  48. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/base_search_key_detector.py +0 -0
  49. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/blocked_time_series.py +0 -0
  50. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/country_utils.py +0 -0
  51. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/custom_loss_utils.py +0 -0
  52. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/cv_utils.py +0 -0
  53. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/datetime_utils.py +0 -0
  54. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/deduplicate_utils.py +0 -0
  55. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/display_utils.py +0 -0
  56. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/fallback_progress_bar.py +0 -0
  57. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/feature_info.py +0 -0
  58. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/format.py +0 -0
  59. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/ip_utils.py +0 -0
  60. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/phone_utils.py +0 -0
  61. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/postal_code_utils.py +0 -0
  62. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/progress_bar.py +0 -0
  63. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/sklearn_ext.py +0 -0
  64. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/target_utils.py +0 -0
  65. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/track_info.py +0 -0
  66. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/utils/warning_counter.py +0 -0
  67. {upgini-1.2.29a7 → upgini-1.2.30a7}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.29a7
3
+ Version: 1.2.30a7
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.30a7 "
@@ -1447,11 +1447,7 @@ class FeaturesEnricher(TransformerMixin):
1447
1447
  client_features = [
1448
1448
  c
1449
1449
  for c in X_sampled.columns.to_list()
1450
- if (
1451
- not self.select_features
1452
- or c in self.feature_names_
1453
- or (self.fit_columns_renaming is not None and self.fit_columns_renaming.get(c) in self.feature_names_)
1454
- )
1450
+ if (not self.select_features or c in self.feature_names_)
1455
1451
  and c
1456
1452
  not in (
1457
1453
  excluding_search_keys
@@ -1668,10 +1664,7 @@ class FeaturesEnricher(TransformerMixin):
1668
1664
  generated_features = []
1669
1665
  if date_column is not None:
1670
1666
  converter = DateTimeSearchKeyConverter(date_column, self.date_format, self.logger, self.bundle)
1671
- # Leave original date column values
1672
- df_with_date_features = converter.convert(df, keep_time=True)
1673
- df_with_date_features[date_column] = df[date_column]
1674
- df = df_with_date_features
1667
+ df = converter.convert(df, keep_time=True)
1675
1668
  generated_features = converter.generated_features
1676
1669
 
1677
1670
  email_columns = SearchKey.find_all_keys(search_keys, SearchKey.EMAIL)
@@ -1680,10 +1673,9 @@ class FeaturesEnricher(TransformerMixin):
1680
1673
  df = generator.generate(df)
1681
1674
  generated_features.extend(generator.generated_features)
1682
1675
 
1683
- # normalizer = Normalizer(self.bundle, self.logger)
1684
- # df, search_keys, generated_features = normalizer.normalize(df, search_keys, generated_features)
1685
- # columns_renaming = normalizer.columns_renaming
1686
- columns_renaming = {c: c for c in df.columns}
1676
+ normalizer = Normalizer(self.bundle, self.logger)
1677
+ df, search_keys, generated_features = normalizer.normalize(df, search_keys, generated_features)
1678
+ columns_renaming = normalizer.columns_renaming
1687
1679
 
1688
1680
  df, _ = clean_full_duplicates(df, logger=self.logger, bundle=self.bundle)
1689
1681
 
@@ -2113,7 +2105,7 @@ class FeaturesEnricher(TransformerMixin):
2113
2105
  date_column = SearchKey.find_key(search_keys, [SearchKey.DATE, SearchKey.DATETIME])
2114
2106
  if date_column is not None:
2115
2107
  converter = DateTimeSearchKeyConverter(date_column, self.date_format, self.logger, bundle=self.bundle)
2116
- df = converter.convert(df, keep_time=True)
2108
+ df = converter.convert(df)
2117
2109
  self.logger.info(f"Date column after convertion: {df[date_column]}")
2118
2110
  generated_features.extend(converter.generated_features)
2119
2111
  else:
@@ -2208,12 +2200,11 @@ class FeaturesEnricher(TransformerMixin):
2208
2200
 
2209
2201
  if add_fit_system_record_id:
2210
2202
  df = self.__add_fit_system_record_id(df, search_keys, SYSTEM_RECORD_ID)
2203
+ if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
2204
+ df = df.drop(columns=DateTimeSearchKeyConverter.DATETIME_COL)
2211
2205
  df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
2212
2206
  features_not_to_pass.append(SORT_ID)
2213
2207
 
2214
- if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
2215
- df = df.drop(columns=DateTimeSearchKeyConverter.DATETIME_COL)
2216
-
2217
2208
  # search keys might be changed after explode
2218
2209
  columns_for_system_record_id = sorted(list(search_keys.keys()) + features_for_transform)
2219
2210
  df[SYSTEM_RECORD_ID] = pd.util.hash_pandas_object(df[columns_for_system_record_id], index=False).astype(
@@ -2232,7 +2223,7 @@ class FeaturesEnricher(TransformerMixin):
2232
2223
 
2233
2224
  combined_search_keys = combine_search_keys(search_keys.keys())
2234
2225
 
2235
- df_without_features = df.drop(columns=features_not_to_pass, errors="ignore")
2226
+ df_without_features = df.drop(columns=features_not_to_pass)
2236
2227
 
2237
2228
  df_without_features, full_duplicates_warning = clean_full_duplicates(
2238
2229
  df_without_features, self.logger, bundle=self.bundle
@@ -882,7 +882,7 @@ class _RestClient:
882
882
  if content_type:
883
883
  headers[_RestClient.CONTENT_TYPE_HEADER_NAME] = content_type
884
884
  if trace_id:
885
- headers[_RestClient.TRACE_ID_HEADER_NAME] = str(trace_id)
885
+ headers[_RestClient.TRACE_ID_HEADER_NAME] = trace_id
886
886
  for header_key, header_value in additional_headers.items():
887
887
  headers[header_key] = header_value
888
888
  return headers
@@ -39,7 +39,7 @@ class EmailDomainGenerator:
39
39
  for email_col in self.email_columns:
40
40
  domain_feature = email_col + self.DOMAIN_SUFFIX
41
41
  if domain_feature not in df.columns:
42
- df[domain_feature] = df[email_col].apply(self._email_to_domain).astype("string")
42
+ df[domain_feature] = df[email_col].apply(self._email_to_domain)
43
43
  self.generated_features.append(domain_feature)
44
44
  return df
45
45
 
@@ -2,7 +2,6 @@ import logging
2
2
  from logging import Logger
3
3
  from typing import Dict, List, Optional, Tuple
4
4
 
5
- import numpy as np
6
5
  import pandas as pd
7
6
  from pandas.api.types import is_integer_dtype, is_object_dtype, is_string_dtype
8
7
 
@@ -84,21 +83,10 @@ class FeaturesValidator:
84
83
  return [
85
84
  i
86
85
  for i in df
87
- if (is_object_dtype(df[i]) or is_string_dtype(df[i]) or FeaturesValidator.__is_integer(df[i]))
86
+ if (is_object_dtype(df[i]) or is_string_dtype(df[i]) or is_integer_dtype(df[i]))
88
87
  and (df[i].nunique(dropna=False) / row_count >= 0.85)
89
88
  ]
90
89
 
91
- @staticmethod
92
- def __is_integer(series: pd.Series) -> bool:
93
- return (
94
- is_integer_dtype(series)
95
- or series.dropna()
96
- .apply(
97
- lambda f: (float.is_integer(f) and abs(f) < np.iinfo(np.int64).max) if isinstance(f, float) else False
98
- )
99
- .all()
100
- )
101
-
102
90
  @staticmethod
103
91
  def find_constant_features(df: pd.DataFrame) -> List[str]:
104
92
  return [i for i in df if df[i].nunique() <= 1]
@@ -1 +0,0 @@
1
- __version__ = "1.2.29a7"
File without changes
File without changes
File without changes
File without changes
File without changes