upgini 1.1.275__tar.gz → 1.1.275a99__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (85) hide show
  1. {upgini-1.1.275/src/upgini.egg-info → upgini-1.1.275a99}/PKG-INFO +1 -1
  2. {upgini-1.1.275 → upgini-1.1.275a99}/setup.py +1 -1
  3. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/ads.py +2 -6
  4. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/dataset.py +3 -4
  5. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/features_enricher.py +2 -4
  6. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/__init__.py +2 -3
  7. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/country_utils.py +2 -2
  8. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/datetime_utils.py +4 -4
  9. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/email_utils.py +2 -2
  10. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/target_utils.py +1 -1
  11. {upgini-1.1.275 → upgini-1.1.275a99/src/upgini.egg-info}/PKG-INFO +1 -1
  12. {upgini-1.1.275 → upgini-1.1.275a99}/LICENSE +0 -0
  13. {upgini-1.1.275 → upgini-1.1.275a99}/README.md +0 -0
  14. {upgini-1.1.275 → upgini-1.1.275a99}/pyproject.toml +0 -0
  15. {upgini-1.1.275 → upgini-1.1.275a99}/setup.cfg +0 -0
  16. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/__init__.py +0 -0
  17. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/ads_management/__init__.py +0 -0
  18. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/ads_management/ads_manager.py +0 -0
  19. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/autofe/__init__.py +0 -0
  20. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/autofe/all_operands.py +0 -0
  21. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/autofe/binary.py +0 -0
  22. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/autofe/date.py +0 -0
  23. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/autofe/feature.py +0 -0
  24. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/autofe/groupby.py +0 -0
  25. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/autofe/operand.py +0 -0
  26. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/autofe/unary.py +0 -0
  27. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/autofe/vector.py +0 -0
  28. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/data_source/__init__.py +0 -0
  29. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/data_source/data_source_publisher.py +0 -0
  30. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/errors.py +0 -0
  31. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/fingerprint.js +0 -0
  32. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/http.py +0 -0
  33. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/mdc/__init__.py +0 -0
  34. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/mdc/context.py +0 -0
  35. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/metadata.py +0 -0
  36. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/metrics.py +0 -0
  37. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/normalizer/__init__.py +0 -0
  38. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/normalizer/phone_normalizer.py +0 -0
  39. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/resource_bundle/__init__.py +0 -0
  40. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/resource_bundle/exceptions.py +0 -0
  41. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/resource_bundle/strings.properties +0 -0
  42. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  43. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/sampler/__init__.py +0 -0
  44. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/sampler/base.py +0 -0
  45. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/sampler/random_under_sampler.py +0 -0
  46. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/sampler/utils.py +0 -0
  47. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/search_task.py +0 -0
  48. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/spinner.py +0 -0
  49. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/base_search_key_detector.py +0 -0
  50. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/blocked_time_series.py +0 -0
  51. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/custom_loss_utils.py +0 -0
  52. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/cv_utils.py +0 -0
  53. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/deduplicate_utils.py +0 -0
  54. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/display_utils.py +0 -0
  55. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/features_validator.py +0 -0
  57. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/format.py +0 -0
  58. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/ip_utils.py +0 -0
  59. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/phone_utils.py +0 -0
  60. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/postal_code_utils.py +0 -0
  61. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/progress_bar.py +0 -0
  62. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/sklearn_ext.py +0 -0
  63. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/track_info.py +0 -0
  64. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/utils/warning_counter.py +0 -0
  65. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini/version_validator.py +0 -0
  66. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini.egg-info/SOURCES.txt +0 -0
  67. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini.egg-info/dependency_links.txt +0 -0
  68. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini.egg-info/requires.txt +0 -0
  69. {upgini-1.1.275 → upgini-1.1.275a99}/src/upgini.egg-info/top_level.txt +0 -0
  70. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_autofe_operands.py +0 -0
  71. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_binary_dataset.py +0 -0
  72. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_blocked_time_series.py +0 -0
  73. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_categorical_dataset.py +0 -0
  74. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_continuous_dataset.py +0 -0
  75. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_country_utils.py +0 -0
  76. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_custom_loss_utils.py +0 -0
  77. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_datetime_utils.py +0 -0
  78. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_email_utils.py +0 -0
  79. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_etalon_validation.py +0 -0
  80. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_features_enricher.py +0 -0
  81. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_metrics.py +0 -0
  82. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_phone_utils.py +0 -0
  83. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_postal_code_utils.py +0 -0
  84. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_target_utils.py +0 -0
  85. {upgini-1.1.275 → upgini-1.1.275a99}/tests/test_widget.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.275
3
+ Version: 1.1.275a99
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -40,7 +40,7 @@ def send_log(msg: str):
40
40
 
41
41
 
42
42
  here = Path(__file__).parent.resolve()
43
- version = "1.1.275"
43
+ version = "1.1.275a99"
44
44
  try:
45
45
  send_log(f"Start setup PyLib version {version}")
46
46
  setup(
@@ -5,7 +5,7 @@ from typing import Dict, Optional
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
- from pandas.api.types import is_object_dtype, is_string_dtype
8
+ from pandas.api.types import is_string_dtype
9
9
 
10
10
  from upgini import SearchKey
11
11
  from upgini.http import get_rest_client
@@ -34,11 +34,7 @@ def upload_user_ads(name: str, df: pd.DataFrame, search_keys: Dict[str, SearchKe
34
34
  if df[column_name].notnull().sum() < min_valid_rows_count:
35
35
  raise ValueError(bundle.get("ads_upload_to_many_empty_rows"))
36
36
  meaning_type = search_keys[column_name].value
37
- if (
38
- meaning_type == FileColumnMeaningType.MSISDN
39
- and not is_string_dtype(df[column_name])
40
- and not is_object_dtype(df[column_name])
41
- ):
37
+ if meaning_type == FileColumnMeaningType.MSISDN and not is_string_dtype(df[column_name]):
42
38
  df[column_name] = df[column_name].values.astype(np.int64).astype("string") # type: ignore
43
39
  else:
44
40
  meaning_type = FileColumnMeaningType.FEATURE
@@ -17,7 +17,6 @@ from pandas.api.types import (
17
17
  is_numeric_dtype,
18
18
  is_period_dtype,
19
19
  is_string_dtype,
20
- is_object_dtype,
21
20
  )
22
21
 
23
22
  from upgini.errors import ValidationError
@@ -220,7 +219,7 @@ class Dataset: # (pd.DataFrame):
220
219
  """Check that string values less than maximum characters for LLM"""
221
220
  # self.logger.info("Validate too long string values")
222
221
  for col in self.data.columns:
223
- if is_string_dtype(self.data[col]) or is_object_dtype(self.data[col]):
222
+ if is_string_dtype(self.data[col]):
224
223
  max_length: int = self.data[col].astype("str").str.len().max()
225
224
  if max_length > self.MAX_STRING_FEATURE_LENGTH:
226
225
  self.data[col] = self.data[col].astype("str").str.slice(stop=self.MAX_STRING_FEATURE_LENGTH)
@@ -351,7 +350,7 @@ class Dataset: # (pd.DataFrame):
351
350
  if postal_code is not None and postal_code in self.data.columns:
352
351
  # self.logger.info("Normalize postal code")
353
352
 
354
- if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
353
+ if is_string_dtype(self.data[postal_code]):
355
354
  try:
356
355
  self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
357
356
  except Exception:
@@ -822,7 +821,7 @@ class Dataset: # (pd.DataFrame):
822
821
  return DataType.INT
823
822
  elif is_float_dtype(pandas_data_type):
824
823
  return DataType.DECIMAL
825
- elif is_string_dtype(pandas_data_type) or is_object_dtype(pandas_data_type):
824
+ elif is_string_dtype(pandas_data_type):
826
825
  return DataType.STRING
827
826
  else:
828
827
  msg = self.bundle.get("dataset_invalid_column_type").format(column_name, pandas_data_type)
@@ -21,7 +21,6 @@ from pandas.api.types import (
21
21
  is_bool,
22
22
  is_datetime64_any_dtype,
23
23
  is_numeric_dtype,
24
- is_object_dtype,
25
24
  is_period_dtype,
26
25
  is_string_dtype,
27
26
  )
@@ -2983,7 +2982,7 @@ class FeaturesEnricher(TransformerMixin):
2983
2982
 
2984
2983
  def __correct_target(self, df: pd.DataFrame) -> pd.DataFrame:
2985
2984
  target = df[self.TARGET_NAME]
2986
- if is_string_dtype(target) or is_object_dtype(target):
2985
+ if is_string_dtype(target):
2987
2986
  maybe_numeric_target = pd.to_numeric(target, errors="coerce")
2988
2987
  # If less than 5% is non numeric then leave this rows with NaN target and later it will be dropped
2989
2988
  if maybe_numeric_target.isna().sum() <= _num_samples(df) * 0.05:
@@ -3383,8 +3382,7 @@ class FeaturesEnricher(TransformerMixin):
3383
3382
  valid_search_keys[column_name] = SearchKey.CUSTOM_KEY
3384
3383
  else:
3385
3384
  if x[column_name].isnull().all() or (
3386
- (is_string_dtype(x[column_name]) or is_object_dtype(x[column_name]))
3387
- and (x[column_name].astype("string").str.strip() == "").all()
3385
+ is_string_dtype(x[column_name]) and (x[column_name].astype("string").str.strip() == "").all()
3388
3386
  ):
3389
3387
  raise ValidationError(self.bundle.get("empty_search_key").format(column_name))
3390
3388
 
@@ -2,7 +2,7 @@ import itertools
2
2
  from typing import List, Tuple
3
3
 
4
4
  import pandas as pd
5
- from pandas.api.types import is_string_dtype, is_object_dtype
5
+ from pandas.api.types import is_string_dtype
6
6
 
7
7
 
8
8
  def combine_search_keys(search_keys: List[str]) -> List[Tuple[str]]:
@@ -20,6 +20,5 @@ def find_numbers_with_decimal_comma(df: pd.DataFrame) -> pd.DataFrame:
20
20
  return [
21
21
  col
22
22
  for col in tmp.columns
23
- if (is_string_dtype(tmp[col]) or is_object_dtype(tmp[col]))
24
- and tmp[col].astype("string").str.match("^[0-9]+,[0-9]*$").any()
23
+ if is_string_dtype(tmp[col]) and tmp[col].astype("string").str.match("^[0-9]+,[0-9]*$").any()
25
24
  ]
@@ -1,5 +1,5 @@
1
1
  import pandas as pd
2
- from pandas.api.types import is_string_dtype, is_object_dtype
2
+ from pandas.api.types import is_string_dtype
3
3
 
4
4
  from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
5
5
 
@@ -9,7 +9,7 @@ class CountrySearchKeyDetector(BaseSearchKeyDetector):
9
9
  return "country" in str(column_name).lower()
10
10
 
11
11
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
12
- if not is_string_dtype(column) and not is_object_dtype(column):
12
+ if not is_string_dtype(column):
13
13
  return False
14
14
 
15
15
  all_count = len(column)
@@ -6,10 +6,7 @@ from typing import Dict, List, Optional
6
6
  import numpy as np
7
7
  import pandas as pd
8
8
  from dateutil.relativedelta import relativedelta
9
- from pandas.api.types import (
10
- is_numeric_dtype,
11
- is_period_dtype,
12
- )
9
+ from pandas.api.types import is_numeric_dtype, is_period_dtype, is_string_dtype
13
10
 
14
11
  from upgini.errors import ValidationError
15
12
  from upgini.metadata import SearchKey
@@ -81,6 +78,9 @@ class DateTimeSearchKeyConverter:
81
78
  df[self.date_column] = df[self.date_column].apply(lambda x: x.replace(tzinfo=None))
82
79
  elif isinstance(df[self.date_column].values[0], datetime.date):
83
80
  df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
81
+ elif is_string_dtype(df[self.date_column]):
82
+ df[self.date_column] = df[self.date_column].apply(self.clean_date)
83
+ df[self.date_column] = self.parse_date(df)
84
84
  elif is_period_dtype(df[self.date_column]):
85
85
  df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
86
86
  elif is_numeric_dtype(df[self.date_column]):
@@ -4,7 +4,7 @@ from hashlib import sha256
4
4
  from typing import Dict, List, Optional
5
5
 
6
6
  import pandas as pd
7
- from pandas.api.types import is_string_dtype, is_object_dtype
7
+ from pandas.api.types import is_string_dtype
8
8
  from upgini.resource_bundle import bundle
9
9
 
10
10
  from upgini.metadata import SearchKey
@@ -18,7 +18,7 @@ class EmailSearchKeyDetector(BaseSearchKeyDetector):
18
18
  return str(column_name).lower() in ["email", "e_mail", "e-mail"]
19
19
 
20
20
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
21
- if not is_string_dtype(column) and not is_object_dtype:
21
+ if not is_string_dtype(column):
22
22
  return False
23
23
  if not column.astype("string").str.contains("@").any():
24
24
  return False
@@ -107,7 +107,7 @@ def balance_undersample(
107
107
  min_class_count = vc[min_class_value]
108
108
 
109
109
  min_class_percent = imbalance_threshold / target_classes_count
110
- min_class_threshold = int(min_class_percent * count)
110
+ min_class_threshold = min_class_percent * count
111
111
 
112
112
  resampled_data = df
113
113
  df = df.copy().sort_values(by=SYSTEM_RECORD_ID)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.275
3
+ Version: 1.1.275a99
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
File without changes
File without changes
File without changes
File without changes
File without changes