upgini 1.2.3__tar.gz → 1.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (66) hide show
  1. {upgini-1.2.3 → upgini-1.2.5}/PKG-INFO +22 -9
  2. {upgini-1.2.3 → upgini-1.2.5}/README.md +21 -8
  3. upgini-1.2.5/src/upgini/__about__.py +1 -0
  4. upgini-1.2.5/src/upgini/__init__.py +13 -0
  5. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/autofe/unary.py +4 -2
  6. upgini-1.2.3/src/upgini/__about__.py +0 -1
  7. upgini-1.2.3/src/upgini/__init__.py +0 -11
  8. {upgini-1.2.3 → upgini-1.2.5}/.gitignore +0 -0
  9. {upgini-1.2.3 → upgini-1.2.5}/LICENSE +0 -0
  10. {upgini-1.2.3 → upgini-1.2.5}/pyproject.toml +0 -0
  11. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/ads.py +0 -0
  12. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/autofe/all_operands.py +0 -0
  16. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/autofe/operand.py +0 -0
  21. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/autofe/vector.py +0 -0
  22. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/data_source/__init__.py +0 -0
  23. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/data_source/data_source_publisher.py +0 -0
  24. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/dataset.py +0 -0
  25. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/errors.py +0 -0
  26. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/features_enricher.py +0 -0
  27. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/http.py +0 -0
  28. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/lazy_import.py +0 -0
  29. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/mdc/__init__.py +0 -0
  30. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/mdc/context.py +0 -0
  31. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/metadata.py +0 -0
  32. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/metrics.py +0 -0
  33. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/normalizer/__init__.py +0 -0
  34. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/normalizer/normalize_utils.py +0 -0
  35. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/resource_bundle/__init__.py +0 -0
  36. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/resource_bundle/exceptions.py +0 -0
  37. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/resource_bundle/strings.properties +0 -0
  38. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  39. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/sampler/__init__.py +0 -0
  40. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/sampler/base.py +0 -0
  41. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/sampler/random_under_sampler.py +0 -0
  42. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/sampler/utils.py +0 -0
  43. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/search_task.py +0 -0
  44. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/spinner.py +0 -0
  45. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/__init__.py +0 -0
  46. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/base_search_key_detector.py +0 -0
  47. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/blocked_time_series.py +0 -0
  48. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/country_utils.py +0 -0
  49. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/custom_loss_utils.py +0 -0
  50. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/cv_utils.py +0 -0
  51. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/datetime_utils.py +0 -0
  52. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/deduplicate_utils.py +0 -0
  53. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/display_utils.py +0 -0
  54. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/email_utils.py +0 -0
  55. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/features_validator.py +0 -0
  57. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/format.py +0 -0
  58. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/ip_utils.py +0 -0
  59. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/phone_utils.py +0 -0
  60. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/postal_code_utils.py +0 -0
  61. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/progress_bar.py +0 -0
  62. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/sklearn_ext.py +0 -0
  63. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/target_utils.py +0 -0
  64. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/track_info.py +0 -0
  65. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/utils/warning_counter.py +0 -0
  66. {upgini-1.2.3 → upgini-1.2.5}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.3
3
+ Version: 1.2.5
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -260,7 +260,9 @@ We do dataset verification and cleaning under the hood, but still there are some
260
260
  *Search keys* columns will be used to match records from all potential external data sources / features.
261
261
  Define one or multiple columns as a search keys with `FeaturesEnricher` class initialization.
262
262
  ```python
263
- from upgini import FeaturesEnricher, SearchKey
263
+ from upgini.features_enricher import FeaturesEnricher
264
+ from upgini.metadata import SearchKey
265
+
264
266
  enricher = FeaturesEnricher(
265
267
  search_keys={
266
268
  "subscription_activation_date": SearchKey.DATE,
@@ -346,7 +348,9 @@ enricher = FeaturesEnricher(
346
348
 
347
349
  For the meaning types <tt>SearchKey.DATE</tt>/<tt>SearchKey.DATETIME</tt> with dtypes <tt>object</tt> or <tt>string</tt> you have to clarify date/datetime format by passing <tt>date_format</tt> parameter to `FeaturesEnricher`. For example:
348
350
  ```python
349
- from upgini import FeaturesEnricher, SearchKey
351
+ from upgini.features_enricher import FeaturesEnricher
352
+ from upgini.metadata import SearchKey
353
+
350
354
  enricher = FeaturesEnricher(
351
355
  search_keys={
352
356
  "subscription_activation_date": SearchKey.DATE,
@@ -367,7 +371,9 @@ df["date"] = df.date.astype("datetime64").dt.tz_localize("Europe/Warsaw")
367
371
 
368
372
  Single country for the whole training dataset can be passed with `country_code` parameter:
369
373
  ```python
370
- from upgini import FeaturesEnricher, SearchKey
374
+ from upgini.features_enricher import FeaturesEnricher
375
+ from upgini.metadata import SearchKey
376
+
371
377
  enricher = FeaturesEnricher(
372
378
  search_keys={
373
379
  "subscription_activation_date": SearchKey.DATE,
@@ -386,7 +392,8 @@ Create instance of the `FeaturesEnricher` class and call:
386
392
  Let's try it out!
387
393
  ```python
388
394
  import pandas as pd
389
- from upgini import FeaturesEnricher, SearchKey
395
+ from upgini.features_enricher import FeaturesEnricher
396
+ from upgini.metadata import SearchKey
390
397
 
391
398
  # load labeled training dataset to initiate search
392
399
  train_df = pd.read_csv("customer_churn_prediction_train.csv")
@@ -477,7 +484,9 @@ We detect ML task under the hood based on label column values. Currently we supp
477
484
 
478
485
  But for certain search datasets you can pass parameter to `FeaturesEnricher` with correct ML taks type:
479
486
  ```python
480
- from upgini import ModelTaskType
487
+ from upgini.features_enricher import FeaturesEnricher
488
+ from upgini.metadata import SearchKey, ModelTaskType
489
+
481
490
  enricher = FeaturesEnricher(
482
491
  search_keys={"subscription_activation_date": SearchKey.DATE},
483
492
  model_task_type=ModelTaskType.REGRESSION
@@ -490,7 +499,9 @@ enricher = FeaturesEnricher(
490
499
 
491
500
  To initiate feature search you can pass cross-validation type parameter to `FeaturesEnricher` with time series specific CV type:
492
501
  ```python
493
- from upgini.metadata import CVType
502
+ from upgini.features_enricher import FeaturesEnricher
503
+ from upgini.metadata import SearchKey, CVType
504
+
494
505
  enricher = FeaturesEnricher(
495
506
  search_keys={"sales_date": SearchKey.DATE},
496
507
  cv=CVType.time_series
@@ -624,7 +635,9 @@ But you can easily define new split by passing child of BaseCrossValidator to pa
624
635
 
625
636
  Example with more tips-and-tricks:
626
637
  ```python
627
- from upgini import FeaturesEnricher, SearchKey
638
+ from upgini.features_enricher import FeaturesEnricher
639
+ from upgini.metadata import SearchKey
640
+
628
641
  enricher = FeaturesEnricher(search_keys={"registration_date": SearchKey.DATE})
629
642
 
630
643
  # Fit with default setup for metrics calculation
@@ -797,7 +810,7 @@ You may publish ANY data which you consider as royalty / license free ([Open Dat
797
810
  2. Copy *Upgini API key* from profile and upload your data from Upgini python library with this key:
798
811
  ```python
799
812
  import pandas as pd
800
- from upgini import SearchKey
813
+ from upgini.metadata import SearchKey
801
814
  from upgini.ads import upload_user_ads
802
815
  import os
803
816
  os.environ["UPGINI_API_KEY"] = "your_long_string_api_key_goes_here"
@@ -217,7 +217,9 @@ We do dataset verification and cleaning under the hood, but still there are some
217
217
  *Search keys* columns will be used to match records from all potential external data sources / features.
218
218
  Define one or multiple columns as a search keys with `FeaturesEnricher` class initialization.
219
219
  ```python
220
- from upgini import FeaturesEnricher, SearchKey
220
+ from upgini.features_enricher import FeaturesEnricher
221
+ from upgini.metadata import SearchKey
222
+
221
223
  enricher = FeaturesEnricher(
222
224
  search_keys={
223
225
  "subscription_activation_date": SearchKey.DATE,
@@ -303,7 +305,9 @@ enricher = FeaturesEnricher(
303
305
 
304
306
  For the meaning types <tt>SearchKey.DATE</tt>/<tt>SearchKey.DATETIME</tt> with dtypes <tt>object</tt> or <tt>string</tt> you have to clarify date/datetime format by passing <tt>date_format</tt> parameter to `FeaturesEnricher`. For example:
305
307
  ```python
306
- from upgini import FeaturesEnricher, SearchKey
308
+ from upgini.features_enricher import FeaturesEnricher
309
+ from upgini.metadata import SearchKey
310
+
307
311
  enricher = FeaturesEnricher(
308
312
  search_keys={
309
313
  "subscription_activation_date": SearchKey.DATE,
@@ -324,7 +328,9 @@ df["date"] = df.date.astype("datetime64").dt.tz_localize("Europe/Warsaw")
324
328
 
325
329
  Single country for the whole training dataset can be passed with `country_code` parameter:
326
330
  ```python
327
- from upgini import FeaturesEnricher, SearchKey
331
+ from upgini.features_enricher import FeaturesEnricher
332
+ from upgini.metadata import SearchKey
333
+
328
334
  enricher = FeaturesEnricher(
329
335
  search_keys={
330
336
  "subscription_activation_date": SearchKey.DATE,
@@ -343,7 +349,8 @@ Create instance of the `FeaturesEnricher` class and call:
343
349
  Let's try it out!
344
350
  ```python
345
351
  import pandas as pd
346
- from upgini import FeaturesEnricher, SearchKey
352
+ from upgini.features_enricher import FeaturesEnricher
353
+ from upgini.metadata import SearchKey
347
354
 
348
355
  # load labeled training dataset to initiate search
349
356
  train_df = pd.read_csv("customer_churn_prediction_train.csv")
@@ -434,7 +441,9 @@ We detect ML task under the hood based on label column values. Currently we supp
434
441
 
435
442
  But for certain search datasets you can pass parameter to `FeaturesEnricher` with correct ML taks type:
436
443
  ```python
437
- from upgini import ModelTaskType
444
+ from upgini.features_enricher import FeaturesEnricher
445
+ from upgini.metadata import SearchKey, ModelTaskType
446
+
438
447
  enricher = FeaturesEnricher(
439
448
  search_keys={"subscription_activation_date": SearchKey.DATE},
440
449
  model_task_type=ModelTaskType.REGRESSION
@@ -447,7 +456,9 @@ enricher = FeaturesEnricher(
447
456
 
448
457
  To initiate feature search you can pass cross-validation type parameter to `FeaturesEnricher` with time series specific CV type:
449
458
  ```python
450
- from upgini.metadata import CVType
459
+ from upgini.features_enricher import FeaturesEnricher
460
+ from upgini.metadata import SearchKey, CVType
461
+
451
462
  enricher = FeaturesEnricher(
452
463
  search_keys={"sales_date": SearchKey.DATE},
453
464
  cv=CVType.time_series
@@ -581,7 +592,9 @@ But you can easily define new split by passing child of BaseCrossValidator to pa
581
592
 
582
593
  Example with more tips-and-tricks:
583
594
  ```python
584
- from upgini import FeaturesEnricher, SearchKey
595
+ from upgini.features_enricher import FeaturesEnricher
596
+ from upgini.metadata import SearchKey
597
+
585
598
  enricher = FeaturesEnricher(search_keys={"registration_date": SearchKey.DATE})
586
599
 
587
600
  # Fit with default setup for metrics calculation
@@ -754,7 +767,7 @@ You may publish ANY data which you consider as royalty / license free ([Open Dat
754
767
  2. Copy *Upgini API key* from profile and upload your data from Upgini python library with this key:
755
768
  ```python
756
769
  import pandas as pd
757
- from upgini import SearchKey
770
+ from upgini.metadata import SearchKey
758
771
  from upgini.ads import upload_user_ads
759
772
  import os
760
773
  os.environ["UPGINI_API_KEY"] = "your_long_string_api_key_goes_here"
@@ -0,0 +1 @@
1
+ __version__ = "1.2.5"
@@ -0,0 +1,13 @@
1
+ import os
2
+
3
+ from upgini.features_enricher import FeaturesEnricher # noqa: F401
4
+ from upgini.metadata import SearchKey, CVType, RuntimeParameters, ModelTaskType # noqa: F401
5
+ # from .lazy_import import LazyImport
6
+
7
+ os.environ["SETUPTOOLS_USE_DISTUTILS"] = "stdlib"
8
+
9
+ # FeaturesEnricher = LazyImport("upgini.features_enricher", "FeaturesEnricher")
10
+ # SearchKey = LazyImport("upgini.metadata", "SearchKey")
11
+ # RuntimeParameters = LazyImport("upgini.metadata", "RuntimeParameters")
12
+ # CVType = LazyImport("upgini.metadata", "CVType")
13
+ # ModelTaskType = LazyImport("upgini.metadata", "ModelTaskType")
@@ -12,10 +12,12 @@ class Abs(PandasOperand, VectorizableMixin):
12
12
  group_index: int = 0
13
13
 
14
14
  def calculate_unary(self, data: pd.Series) -> pd.Series:
15
- return data.abs()
15
+ return data.astype(np.float64).abs()
16
+ # return data.abs()
16
17
 
17
18
  def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:
18
- return data.abs()
19
+ return data.astype(np.float64).abs()
20
+ # return data.abs()
19
21
 
20
22
 
21
23
  class Log(PandasOperand, VectorizableMixin):
@@ -1 +0,0 @@
1
- __version__ = "1.2.3"
@@ -1,11 +0,0 @@
1
- import os
2
-
3
- from .lazy_import import LazyImport
4
-
5
- os.environ["SETUPTOOLS_USE_DISTUTILS"] = "stdlib"
6
-
7
- FeaturesEnricher = LazyImport("upgini.features_enricher", "FeaturesEnricher")
8
- SearchKey = LazyImport("upgini.metadata", "SearchKey")
9
- RuntimeParameters = LazyImport("upgini.metadata", "RuntimeParameters")
10
- CVType = LazyImport("upgini.metadata", "CVType")
11
- ModelTaskType = LazyImport("upgini.metadata", "ModelTaskType")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes