upgini 1.2.154a4146.dev9__tar.gz → 1.2.155.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/PKG-INFO +2 -2
  2. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/pyproject.toml +1 -1
  3. upgini-1.2.155.dev1/src/upgini/__about__.py +1 -0
  4. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/data_source/data_source_publisher.py +4 -2
  5. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/features_enricher.py +11 -6
  6. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/metadata.py +4 -3
  7. upgini-1.2.154a4146.dev9/src/upgini/__about__.py +0 -1
  8. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/.gitignore +0 -0
  9. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/LICENSE +0 -0
  10. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/README.md +0 -0
  11. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/__init__.py +0 -0
  12. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/ads.py +0 -0
  13. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/all_operators.py +0 -0
  17. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/binary.py +0 -0
  18. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/date.py +0 -0
  19. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/feature.py +0 -0
  20. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/groupby.py +0 -0
  21. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/operator.py +0 -0
  22. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/__init__.py +0 -0
  23. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/base.py +0 -0
  24. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/cross.py +0 -0
  25. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/delta.py +0 -0
  26. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/lag.py +0 -0
  27. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/roll.py +0 -0
  28. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/trend.py +0 -0
  29. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/volatility.py +0 -0
  30. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/unary.py +0 -0
  31. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/utils.py +0 -0
  32. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/vector.py +0 -0
  33. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/data_source/__init__.py +0 -0
  34. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/dataset.py +0 -0
  35. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/errors.py +0 -0
  36. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/http.py +0 -0
  37. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/mdc/__init__.py +0 -0
  38. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/mdc/context.py +0 -0
  39. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/metrics.py +0 -0
  40. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/normalizer/__init__.py +0 -0
  41. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/normalizer/normalize_utils.py +0 -0
  42. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/resource_bundle/__init__.py +0 -0
  43. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/resource_bundle/exceptions.py +0 -0
  44. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/resource_bundle/strings.properties +0 -0
  45. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  46. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/sampler/__init__.py +0 -0
  47. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/sampler/base.py +0 -0
  48. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/sampler/random_under_sampler.py +0 -0
  49. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/sampler/utils.py +0 -0
  50. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/search_task.py +0 -0
  51. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/spinner.py +0 -0
  52. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  53. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/__init__.py +0 -0
  54. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/base_search_key_detector.py +0 -0
  55. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/blocked_time_series.py +0 -0
  56. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/config.py +0 -0
  57. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/country_utils.py +0 -0
  58. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/custom_loss_utils.py +0 -0
  59. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/cv_utils.py +0 -0
  60. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/datetime_utils.py +0 -0
  61. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/deduplicate_utils.py +0 -0
  62. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/display_utils.py +0 -0
  63. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/email_utils.py +0 -0
  64. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/fallback_progress_bar.py +0 -0
  65. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/feature_info.py +0 -0
  66. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/features_validator.py +0 -0
  67. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/format.py +0 -0
  68. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/hash_utils.py +0 -0
  69. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/ip_utils.py +0 -0
  70. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/mstats.py +0 -0
  71. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/one_hot_encoder.py +0 -0
  72. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/phone_utils.py +0 -0
  73. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/postal_code_utils.py +0 -0
  74. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/progress_bar.py +0 -0
  75. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/psi.py +0 -0
  76. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/sample_utils.py +0 -0
  77. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/sklearn_ext.py +0 -0
  78. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/sort.py +0 -0
  79. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/target_utils.py +0 -0
  80. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/track_info.py +0 -0
  81. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/ts_utils.py +0 -0
  82. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/warning_counter.py +0 -0
  83. {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.154a4146.dev9
3
+ Version: 1.2.155.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -34,7 +34,7 @@ Requires-Dist: more-itertools==10.7.0
34
34
  Requires-Dist: numpy<3.0.0,>=1.19.0
35
35
  Requires-Dist: pandas<3.0.0,>=1.1.0
36
36
  Requires-Dist: psutil>=5.9.0
37
- Requires-Dist: pyarrow==18.1.0
37
+ Requires-Dist: pyarrow==23.0.1
38
38
  Requires-Dist: pydantic<3.0.0,>1.0.0
39
39
  Requires-Dist: pyjwt>=2.8.0
40
40
  Requires-Dist: python-bidi==0.4.2
@@ -56,7 +56,7 @@ dependencies = [
56
56
  "psutil>=5.9.0",
57
57
  "category-encoders>=2.8.1",
58
58
  "more_itertools==10.7.0",
59
- "pyarrow==18.1.0",
59
+ "pyarrow==23.0.1",
60
60
  ]
61
61
 
62
62
  [project.urls]
@@ -0,0 +1 @@
1
+ __version__ = "1.2.155.dev1"
@@ -540,7 +540,8 @@ class DataSourcePublisher:
540
540
  )
541
541
 
542
542
  @staticmethod
543
- def build_place_request(data_table_uri: str,
543
+ def build_place_request(
544
+ data_table_uri: str,
544
545
  search_keys: Dict[str, SearchKey],
545
546
  update_frequency: Union[
546
547
  Literal["Daily"], Literal["Weekly"], Literal["Monthly"], Literal["Quarterly"], Literal["Annually"]
@@ -564,7 +565,8 @@ class DataSourcePublisher:
564
565
  force_percentile_generation: Optional[List[str]] = None,
565
566
  _force_generation=False,
566
567
  _silent=False,
567
- ads_hints: Optional[list[AdsHint]] = None) -> Dict:
568
+ ads_hints: Optional[list[AdsHint]] = None,
569
+ ) -> Dict:
568
570
  if data_table_uri is None or not data_table_uri.startswith("bq://"):
569
571
  raise ValidationError(
570
572
  "Unsupported data table uri. It should looks like bq://projectId.datasetId.tableId"
@@ -1772,7 +1772,7 @@ class FeaturesEnricher(TransformerMixin):
1772
1772
  }
1773
1773
  date_column = self._get_date_column(search_keys)
1774
1774
  if date_column:
1775
- cat_features.discard(date_column)
1775
+ cat_features.discard(to_renamed(date_column))
1776
1776
  return sorted(cat_features)
1777
1777
 
1778
1778
  def _resolve_client_features_in_sampled(
@@ -1878,7 +1878,8 @@ class FeaturesEnricher(TransformerMixin):
1878
1878
 
1879
1879
  file_meta = self._search_task.get_file_metadata(self._get_trace_id())
1880
1880
  fit_dropped_features = list(self.fit_dropped_features or file_meta.droppedColumns or [])
1881
- renamed_to_original = {renamed: original for original, renamed in columns_renaming.items()}
1881
+ renamed_to_original = dict(columns_renaming)
1882
+ original_to_renamed = {original: hashed for hashed, original in columns_renaming.items()}
1882
1883
  excluding_search_keys_original = [renamed_to_original.get(sk, sk) for sk in excluding_search_keys]
1883
1884
 
1884
1885
  excluded_client_columns = (
@@ -1886,7 +1887,7 @@ class FeaturesEnricher(TransformerMixin):
1886
1887
  + fit_dropped_features
1887
1888
  + [DateTimeConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
1888
1889
  )
1889
- excluded_client_columns_renamed = {columns_renaming.get(c, c) for c in excluded_client_columns}
1890
+ excluded_client_columns_renamed = {original_to_renamed.get(c, c) for c in excluded_client_columns}
1890
1891
 
1891
1892
  # Client columns for enriched metrics (respects select_features).
1892
1893
  client_features = [
@@ -1937,9 +1938,13 @@ class FeaturesEnricher(TransformerMixin):
1937
1938
  )
1938
1939
 
1939
1940
  fitting_X = X_sorted[baseline_client_features_in_sampled].copy()
1940
- fitting_enriched_X = enriched_X_sorted[
1941
- enriched_client_features_in_sampled + existing_selected_enriched_features
1942
- ].copy()
1941
+ enriched_columns_for_fitting = []
1942
+ seen_enriched_columns: set[str] = set()
1943
+ for column in enriched_client_features_in_sampled + existing_selected_enriched_features:
1944
+ if column not in seen_enriched_columns:
1945
+ enriched_columns_for_fitting.append(column)
1946
+ seen_enriched_columns.add(column)
1947
+ fitting_enriched_X = enriched_X_sorted[enriched_columns_for_fitting].copy()
1943
1948
 
1944
1949
  renamed_generate_features = [columns_renaming.get(c, c) for c in (self.generate_features or [])]
1945
1950
  renamed_client_cat_features = [columns_renaming.get(c, c) for c in (client_cat_features or [])]
@@ -397,12 +397,13 @@ class AddInfo(BaseModel):
397
397
 
398
398
  class AdsHintType(str, Enum):
399
399
  # BW table column must have type `DATE`
400
- DATE_CLUSTER_KEY = "DATE_CLUSTER_KEY" # cluster column also plays role of ADS search key
401
- DATE_CLUSTER_SYNTHETIC = "DATE_CLUSTER_SYNTHETIC" # cluster column used only for partition elimination
400
+ DATE_CLUSTER_KEY = "DATE_CLUSTER_KEY" # cluster column also plays role of ADS search key
401
+ DATE_CLUSTER_SYNTHETIC = "DATE_CLUSTER_SYNTHETIC" # cluster column used only for partition elimination
402
+
402
403
 
403
404
  class AdsHint(BaseModel):
404
405
  adsHintType: AdsHintType
405
406
  hintColumnName: str
406
407
  # must be set if registered ADS defined as view and has clustered column in underlying table
407
408
  # format: myDatasetId.myTableUsedInViewWithClusterField
408
- fullyQualifiedTableName: Optional[str] = None
409
+ fullyQualifiedTableName: Optional[str] = None
@@ -1 +0,0 @@
1
- __version__ = "1.2.154a4146.dev9"