upgini 1.1.306__tar.gz → 1.1.307__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (66) hide show
  1. {upgini-1.1.306 → upgini-1.1.307}/PKG-INFO +1 -1
  2. upgini-1.1.307/src/upgini/__about__.py +1 -0
  3. upgini-1.1.307/src/upgini/__init__.py +11 -0
  4. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/features_enricher.py +18 -16
  5. upgini-1.1.306/src/upgini/__about__.py +0 -1
  6. upgini-1.1.306/src/upgini/__init__.py +0 -7
  7. {upgini-1.1.306 → upgini-1.1.307}/.gitignore +0 -0
  8. {upgini-1.1.306 → upgini-1.1.307}/LICENSE +0 -0
  9. {upgini-1.1.306 → upgini-1.1.307}/README.md +0 -0
  10. {upgini-1.1.306 → upgini-1.1.307}/pyproject.toml +0 -0
  11. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/ads.py +0 -0
  12. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/autofe/all_operands.py +0 -0
  16. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/autofe/operand.py +0 -0
  21. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/autofe/unary.py +0 -0
  22. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/autofe/vector.py +0 -0
  23. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/data_source/__init__.py +0 -0
  24. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/data_source/data_source_publisher.py +0 -0
  25. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/dataset.py +0 -0
  26. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/errors.py +0 -0
  27. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/http.py +0 -0
  28. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/lazy_import.py +0 -0
  29. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/mdc/__init__.py +0 -0
  30. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/mdc/context.py +0 -0
  31. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/metadata.py +0 -0
  32. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/metrics.py +0 -0
  33. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/normalizer/__init__.py +0 -0
  34. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/normalizer/phone_normalizer.py +0 -0
  35. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/resource_bundle/__init__.py +0 -0
  36. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/resource_bundle/exceptions.py +0 -0
  37. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/resource_bundle/strings.properties +0 -0
  38. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  39. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/sampler/__init__.py +0 -0
  40. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/sampler/base.py +0 -0
  41. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/sampler/random_under_sampler.py +0 -0
  42. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/sampler/utils.py +0 -0
  43. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/search_task.py +0 -0
  44. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/spinner.py +0 -0
  45. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/__init__.py +0 -0
  46. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/base_search_key_detector.py +0 -0
  47. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/blocked_time_series.py +0 -0
  48. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/country_utils.py +0 -0
  49. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/custom_loss_utils.py +0 -0
  50. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/cv_utils.py +0 -0
  51. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/datetime_utils.py +0 -0
  52. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/deduplicate_utils.py +0 -0
  53. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/display_utils.py +0 -0
  54. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/email_utils.py +0 -0
  55. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/features_validator.py +0 -0
  57. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/format.py +0 -0
  58. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/ip_utils.py +0 -0
  59. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/phone_utils.py +0 -0
  60. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/postal_code_utils.py +0 -0
  61. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/progress_bar.py +0 -0
  62. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/sklearn_ext.py +0 -0
  63. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/target_utils.py +0 -0
  64. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/track_info.py +0 -0
  65. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/utils/warning_counter.py +0 -0
  66. {upgini-1.1.306 → upgini-1.1.307}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.306
3
+ Version: 1.1.307
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.1.307"
@@ -0,0 +1,11 @@
1
+ import os
2
+
3
+ from .lazy_import import LazyImport
4
+
5
+ os.environ["SETUPTOOLS_USE_DISTUTILS"] = "stdlib"
6
+
7
+ FeaturesEnricher = LazyImport("upgini.features_enricher", "FeaturesEnricher")
8
+ SearchKey = LazyImport("upgini.metadata", "SearchKey")
9
+ RuntimeParameters = LazyImport("upgini.metadata", "RuntimeParameters")
10
+ CVType = LazyImport("upgini.metadata", "CVType")
11
+ ModelTaskType = LazyImport("upgini.metadata", "ModelTaskType")
@@ -2300,8 +2300,6 @@ class FeaturesEnricher(TransformerMixin):
2300
2300
  self.fit_search_keys = self.search_keys.copy()
2301
2301
  self.fit_search_keys = self.__prepare_search_keys(validated_X, self.fit_search_keys, is_demo_dataset)
2302
2302
 
2303
- validate_dates_distribution(validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
2304
-
2305
2303
  maybe_date_column = self._get_date_column(self.fit_search_keys)
2306
2304
  has_date = maybe_date_column is not None
2307
2305
  model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
@@ -2322,9 +2320,6 @@ class FeaturesEnricher(TransformerMixin):
2322
2320
 
2323
2321
  df = self.__handle_index_search_keys(df, self.fit_search_keys)
2324
2322
 
2325
- if is_numeric_dtype(df[self.TARGET_NAME]) and has_date:
2326
- self._validate_PSI(df.sort_values(by=maybe_date_column))
2327
-
2328
2323
  if DEFAULT_INDEX in df.columns:
2329
2324
  msg = self.bundle.get("unsupported_index_column")
2330
2325
  self.logger.info(msg)
@@ -2334,26 +2329,33 @@ class FeaturesEnricher(TransformerMixin):
2334
2329
 
2335
2330
  df = self.__add_country_code(df, self.fit_search_keys)
2336
2331
 
2337
- df = remove_fintech_duplicates(
2338
- df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
2339
- )
2340
- df = clean_full_duplicates(df, self.logger, bundle=self.bundle)
2341
-
2342
- date_column = self._get_date_column(self.fit_search_keys)
2343
- self.__adjust_cv(df, date_column, model_task_type)
2344
-
2345
2332
  self.fit_generated_features = []
2346
2333
 
2347
- if date_column is not None:
2348
- converter = DateTimeSearchKeyConverter(date_column, self.date_format, self.logger, bundle=self.bundle)
2334
+ if has_date:
2335
+ converter = DateTimeSearchKeyConverter(maybe_date_column, self.date_format, self.logger, bundle=self.bundle)
2349
2336
  df = converter.convert(df, keep_time=True)
2350
- self.logger.info(f"Date column after convertion: {df[date_column]}")
2337
+ self.logger.info(f"Date column after convertion: {df[maybe_date_column]}")
2351
2338
  self.fit_generated_features.extend(converter.generated_features)
2352
2339
  else:
2353
2340
  self.logger.info("Input dataset hasn't date column")
2354
2341
  if self.add_date_if_missing:
2355
2342
  df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
2356
2343
 
2344
+ # Checks that need validated date
2345
+ validate_dates_distribution(validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
2346
+
2347
+ if is_numeric_dtype(df[self.TARGET_NAME]) and has_date:
2348
+ self._validate_PSI(df.sort_values(by=maybe_date_column))
2349
+
2350
+ self.__adjust_cv(df, maybe_date_column, model_task_type)
2351
+
2352
+ # TODO normalize and convert all columns
2353
+
2354
+ df = remove_fintech_duplicates(
2355
+ df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
2356
+ )
2357
+ df = clean_full_duplicates(df, self.logger, bundle=self.bundle)
2358
+
2357
2359
  if (
2358
2360
  self.detect_missing_search_keys
2359
2361
  and list(self.fit_search_keys.values()) == [SearchKey.DATE]
@@ -1 +0,0 @@
1
- __version__ = "1.1.306"
@@ -1,7 +0,0 @@
1
- from .lazy_import import LazyImport
2
-
3
- FeaturesEnricher = LazyImport('upgini.features_enricher', 'FeaturesEnricher')
4
- SearchKey = LazyImport('upgini.metadata', 'SearchKey')
5
- RuntimeParameters = LazyImport('upgini.metadata', 'RuntimeParameters')
6
- CVType = LazyImport('upgini.metadata', 'CVType')
7
- ModelTaskType = LazyImport('upgini.metadata', 'ModelTaskType')
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes