upgini 1.1.287a3232.post1__tar.gz → 1.1.288a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (64) hide show
  1. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/PKG-INFO +1 -1
  2. upgini-1.1.288a0/src/upgini/__about__.py +1 -0
  3. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/autofe/date.py +7 -18
  4. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/data_source/data_source_publisher.py +3 -0
  5. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/resource_bundle/strings.properties +1 -1
  6. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/sklearn_ext.py +1 -1
  7. upgini-1.1.287a3232.post1/src/upgini/__about__.py +0 -1
  8. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/.gitignore +0 -0
  9. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/LICENSE +0 -0
  10. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/README.md +0 -0
  11. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/pyproject.toml +0 -0
  12. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/__init__.py +0 -0
  13. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/ads.py +0 -0
  14. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/ads_management/__init__.py +0 -0
  15. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/ads_management/ads_manager.py +0 -0
  16. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/autofe/__init__.py +0 -0
  17. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/autofe/all_operands.py +0 -0
  18. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/autofe/binary.py +0 -0
  19. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/autofe/feature.py +0 -0
  20. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/autofe/groupby.py +0 -0
  21. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/autofe/operand.py +0 -0
  22. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/autofe/unary.py +0 -0
  23. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/autofe/vector.py +0 -0
  24. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/data_source/__init__.py +0 -0
  25. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/dataset.py +0 -0
  26. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/errors.py +0 -0
  27. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/features_enricher.py +3 -3
  28. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/http.py +0 -0
  29. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/mdc/__init__.py +0 -0
  30. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/mdc/context.py +0 -0
  31. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/metadata.py +0 -0
  32. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/metrics.py +0 -0
  33. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/normalizer/__init__.py +0 -0
  34. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/normalizer/phone_normalizer.py +0 -0
  35. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/resource_bundle/__init__.py +0 -0
  36. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/resource_bundle/exceptions.py +0 -0
  37. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/search_task.py +0 -0
  43. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/spinner.py +0 -0
  44. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/__init__.py +0 -0
  45. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/base_search_key_detector.py +0 -0
  46. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/blocked_time_series.py +0 -0
  47. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/country_utils.py +0 -0
  48. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/custom_loss_utils.py +0 -0
  49. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/cv_utils.py +0 -0
  50. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/datetime_utils.py +0 -0
  51. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/deduplicate_utils.py +0 -0
  52. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/display_utils.py +0 -0
  53. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/email_utils.py +0 -0
  54. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/fallback_progress_bar.py +0 -0
  55. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/features_validator.py +0 -0
  56. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/format.py +0 -0
  57. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/ip_utils.py +0 -0
  58. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/phone_utils.py +0 -0
  59. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/postal_code_utils.py +0 -0
  60. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/progress_bar.py +0 -0
  61. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/target_utils.py +0 -0
  62. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/track_info.py +0 -0
  63. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/utils/warning_counter.py +0 -0
  64. {upgini-1.1.287a3232.post1 → upgini-1.1.288a0}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.287a3232.post1
3
+ Version: 1.1.288a0
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.1.288a0"
@@ -2,7 +2,6 @@ from typing import Any, Dict, List, Optional, Union
2
2
 
3
3
  import numpy as np
4
4
  import pandas as pd
5
- import datetime
6
5
  from pandas.core.arrays.timedeltas import TimedeltaArray
7
6
  from pydantic import BaseModel, validator
8
7
 
@@ -22,20 +21,6 @@ class DateDiffMixin(BaseModel):
22
21
 
23
22
  return pd.to_datetime(x, unit=unit)
24
23
 
25
- def _convert_diff_to_unit(self, diff: Union[pd.Series, TimedeltaArray]) -> Union[pd.Series, TimedeltaArray]:
26
- if self.diff_unit == "M":
27
- raise Exception("Unsupported difference unit: Month")
28
- elif self.diff_unit == "D":
29
- if isinstance(diff, pd.Series) and diff.dtype == "object":
30
- return diff.apply(lambda x: None if isinstance(x, float) and np.isnan(x) else x.days)
31
- else:
32
- return diff / np.timedelta64(1, self.diff_unit)
33
- elif self.diff_unit == "Y":
34
- if isinstance(diff, TimedeltaArray):
35
- return (diff / 365 / 24 / 60 / 60 / 10**9).astype(int)
36
- else:
37
- return (diff / 365 / 24 / 60 / 60 / 10**9).dt.nanoseconds
38
-
39
24
 
40
25
  class DateDiff(PandasOperand, DateDiffMixin):
41
26
  name = "date_diff"
@@ -56,8 +41,7 @@ class DateDiff(PandasOperand, DateDiffMixin):
56
41
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
57
42
  left = self._convert_to_date(left, self.left_unit)
58
43
  right = self._convert_to_date(right, self.right_unit)
59
- diff = self._convert_diff_to_unit(left.dt.date - right.dt.date)
60
- return self.__replace_negative(diff)
44
+ return self.__replace_negative((left - right) / np.timedelta64(1, self.diff_unit))
61
45
 
62
46
  def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
63
47
  x[x < 0] = None
@@ -123,7 +107,12 @@ class DateListDiff(PandasOperand, DateDiffMixin):
123
107
  return pd.Series(left - right.values).apply(lambda x: self._agg(self._diff(x)))
124
108
 
125
109
  def _diff(self, x: TimedeltaArray):
126
- x = self._convert_diff_to_unit(x)
110
+ if self.diff_unit == "Y":
111
+ x = (x / 365 / 24 / 60 / 60 / 10**9).astype(int)
112
+ elif self.diff_unit == "M":
113
+ raise Exception("Unsupported difference unit: Month")
114
+ else:
115
+ x = x / np.timedelta64(1, self.diff_unit)
127
116
  return x[x > 0]
128
117
 
129
118
  def _agg(self, x):
@@ -58,6 +58,7 @@ class DataSourcePublisher:
58
58
  join_date_abs_limit_days: Optional[int] = None,
59
59
  features_for_embeddings: Optional[List[str]] = DEFAULT_GENERATE_EMBEDDINGS,
60
60
  data_table_id_to_replace: Optional[str] = None,
61
+ keep_features: Optional[List[str]] = None,
61
62
  _force_generation=False,
62
63
  _silent=False,
63
64
  ) -> str:
@@ -116,6 +117,8 @@ class DataSourcePublisher:
116
117
  request["adsDefinitionIdToReplace"] = data_table_id_to_replace
117
118
  if exclude_from_autofe_generation is not None:
118
119
  request["excludeFromGeneration"] = exclude_from_autofe_generation
120
+ if keep_features is not None:
121
+ request["keepFeatures"] = keep_features
119
122
  self.logger.info(f"Start registering data table {request}")
120
123
 
121
124
  task_id = self._rest_client.register_ads(request, trace_id)
@@ -81,7 +81,7 @@ date_and_datetime_simultanious=DATE and DATETIME search keys cannot be used simu
81
81
  email_and_hem_simultanious=EMAIL and HEM search keys cannot be used simultaneously. Choose one to keep
82
82
  postal_code_without_country=COUNTRY search key required if POSTAL_CODE is present
83
83
  multiple_search_key=Search key {} passed multiple times
84
- unregistered_only_personal_keys=Only personal search keys used. Api_key from profile.upgini.com required for EMAIL/HEM, PHONE NUMBER or IPv4 search keys\nSee docs https://github.com/upgini/upgini#-open-up-all-capabilities-of-upgini
84
+ unregistered_only_personal_keys=Only personal search keys used. Api_key from profile.upgini.com required for EMAIL/HEM, PHONE NUMBER or IPv4/IPv6 search keys\nSee docs https://github.com/upgini/upgini#-open-up-all-capabilities-of-upgini
85
85
  search_key_not_found=Column `{}` from search_keys was not found in X dataframe: {}
86
86
  numeric_search_key_not_found=Index {} in search_keys is out of bounds for {} columns of X dataframe
87
87
  unsupported_search_key_type=Unsupported type of key in search_keys: {}
@@ -320,7 +320,7 @@ def cross_validate(
320
320
  shuffle = cv.shuffle
321
321
  else:
322
322
  shuffle = False
323
- if hasattr(cv, "random_state"):
323
+ if hasattr(cv, "random_state") and shuffle:
324
324
  random_state = cv.random_state
325
325
  else:
326
326
  random_state = None
@@ -1 +0,0 @@
1
- __version__ = "1.1.287a3232-1"
File without changes
@@ -2596,9 +2596,6 @@ class FeaturesEnricher(TransformerMixin):
2596
2596
  return validated_X
2597
2597
 
2598
2598
  def _validate_y(self, X: pd.DataFrame, y) -> pd.Series:
2599
- if _num_samples(y) == 0:
2600
- raise ValidationError(self.bundle.get("y_is_empty"))
2601
-
2602
2599
  if (
2603
2600
  not isinstance(y, pd.Series)
2604
2601
  and not isinstance(y, pd.DataFrame)
@@ -2607,6 +2604,9 @@ class FeaturesEnricher(TransformerMixin):
2607
2604
  ):
2608
2605
  raise ValidationError(self.bundle.get("unsupported_y_type").format(type(y)))
2609
2606
 
2607
+ if _num_samples(y) == 0:
2608
+ raise ValidationError(self.bundle.get("y_is_empty"))
2609
+
2610
2610
  if _num_samples(X) != _num_samples(y):
2611
2611
  raise ValidationError(self.bundle.get("x_and_y_diff_size").format(_num_samples(X), _num_samples(y)))
2612
2612