upgini 1.1.312a3__tar.gz → 1.1.312a4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (65) hide show
  1. {upgini-1.1.312a3 → upgini-1.1.312a4}/PKG-INFO +1 -1
  2. upgini-1.1.312a4/src/upgini/__about__.py +1 -0
  3. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/dataset.py +4 -2
  4. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/datetime_utils.py +7 -4
  5. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/phone_utils.py +4 -4
  6. upgini-1.1.312a3/src/upgini/__about__.py +0 -1
  7. {upgini-1.1.312a3 → upgini-1.1.312a4}/.gitignore +0 -0
  8. {upgini-1.1.312a3 → upgini-1.1.312a4}/LICENSE +0 -0
  9. {upgini-1.1.312a3 → upgini-1.1.312a4}/README.md +0 -0
  10. {upgini-1.1.312a3 → upgini-1.1.312a4}/pyproject.toml +0 -0
  11. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/__init__.py +0 -0
  12. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/ads.py +0 -0
  13. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/autofe/all_operands.py +0 -0
  17. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/autofe/binary.py +0 -0
  18. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/autofe/date.py +0 -0
  19. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/autofe/feature.py +0 -0
  20. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/autofe/groupby.py +0 -0
  21. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/autofe/operand.py +0 -0
  22. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/autofe/unary.py +0 -0
  23. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/autofe/vector.py +0 -0
  24. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/data_source/__init__.py +0 -0
  25. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/data_source/data_source_publisher.py +0 -0
  26. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/errors.py +0 -0
  27. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/features_enricher.py +0 -0
  28. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/http.py +0 -0
  29. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/lazy_import.py +0 -0
  30. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/mdc/__init__.py +0 -0
  31. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/mdc/context.py +0 -0
  32. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/metadata.py +0 -0
  33. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/metrics.py +0 -0
  34. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/normalizer/__init__.py +0 -0
  35. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/normalizer/normalize_utils.py +0 -0
  36. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/resource_bundle/__init__.py +0 -0
  37. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/resource_bundle/exceptions.py +0 -0
  38. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/resource_bundle/strings.properties +0 -0
  39. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  40. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/sampler/__init__.py +0 -0
  41. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/sampler/base.py +0 -0
  42. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/sampler/random_under_sampler.py +0 -0
  43. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/sampler/utils.py +0 -0
  44. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/search_task.py +0 -0
  45. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/spinner.py +0 -0
  46. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/__init__.py +0 -0
  47. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/base_search_key_detector.py +0 -0
  48. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/blocked_time_series.py +0 -0
  49. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/country_utils.py +0 -0
  50. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/custom_loss_utils.py +0 -0
  51. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/cv_utils.py +0 -0
  52. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/deduplicate_utils.py +0 -0
  53. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/display_utils.py +0 -0
  54. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/email_utils.py +0 -0
  55. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/features_validator.py +0 -0
  57. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/format.py +0 -0
  58. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/ip_utils.py +0 -0
  59. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/postal_code_utils.py +0 -0
  60. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/progress_bar.py +0 -0
  61. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/sklearn_ext.py +0 -0
  62. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/target_utils.py +0 -0
  63. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/track_info.py +0 -0
  64. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/utils/warning_counter.py +0 -0
  65. {upgini-1.1.312a3 → upgini-1.1.312a4}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.312a3
3
+ Version: 1.1.312a4
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.1.312a4"
@@ -440,9 +440,11 @@ class Dataset: # (pd.DataFrame):
440
440
  FileColumnMeaningType.DATETIME,
441
441
  # FileColumnMeaningType.IP_ADDRESS,
442
442
  }:
443
+ min_value = self.data[column_name].astype("Int64").min()
444
+ max_value = self.data[column_name].astype("Int64").max()
443
445
  min_max_values = NumericInterval(
444
- minValue=self.data[column_name].astype("Int64").min(),
445
- maxValue=self.data[column_name].astype("Int64").max(),
446
+ minValue=min_value,
447
+ maxValue=max_value,
446
448
  )
447
449
  else:
448
450
  min_max_values = None
@@ -1,6 +1,7 @@
1
1
  import datetime
2
2
  import logging
3
3
  import re
4
+ import pytz
4
5
  from typing import Dict, List, Optional
5
6
 
6
7
  import numpy as np
@@ -28,12 +29,13 @@ DATE_FORMATS = [
28
29
  "%Y-%m-%dT%H:%M:%S.%f",
29
30
  ]
30
31
 
31
- DATETIME_PATTERN = r"^[\d\s\.\-:T/]+$"
32
+ DATETIME_PATTERN = r"^[\d\s\.\-:T/+]+$"
32
33
 
33
34
 
34
35
  class DateTimeSearchKeyConverter:
35
36
  DATETIME_COL = "_date_time"
36
- MIN_SUPPORTED_DATE_TS = datetime.datetime(1999, 12, 31) # 946684800000 # 2000-01-01
37
+ # MIN_SUPPORTED_DATE_TS = datetime.datetime(1999, 12, 31) # 946684800000 # 2000-01-01
38
+ MIN_SUPPORTED_DATE_TS = pd.to_datetime(datetime.datetime(1999, 12, 31)).tz_localize(None)
37
39
 
38
40
  def __init__(
39
41
  self,
@@ -106,12 +108,13 @@ class DateTimeSearchKeyConverter:
106
108
  df[self.date_column] = df[self.date_column].astype("string").apply(self.clean_date)
107
109
  df[self.date_column] = self.parse_date(df)
108
110
 
109
- df = self.clean_old_dates(df)
110
-
111
111
  # If column with date is datetime then extract seconds of the day and minute of the hour
112
112
  # as additional features
113
113
  seconds = "datetime_seconds"
114
114
  df[self.date_column] = df[self.date_column].dt.tz_localize(None)
115
+
116
+ df = self.clean_old_dates(df)
117
+
115
118
  df[seconds] = (df[self.date_column] - df[self.date_column].dt.floor("D")).dt.seconds
116
119
 
117
120
  seconds_without_na = df[seconds].dropna()
@@ -29,21 +29,21 @@ class PhoneSearchKeyConverter:
29
29
  def convert(self, df: pd.DataFrame) -> pd.DataFrame:
30
30
  df = self.phone_to_int(df)
31
31
  if self.country_column is not None:
32
- df = df.apply(self.add_prefix, axis=1)
32
+ df[self.phone_column] = df.apply(self.add_prefix, axis=1)
33
33
  df[self.phone_column] = df[self.phone_column].astype("Int64")
34
34
  return df
35
35
 
36
36
  def add_prefix(self, row):
37
37
  phone = row[self.phone_column]
38
38
  if pd.isna(phone):
39
- return row
39
+ return phone
40
40
  country = row[self.country_column]
41
41
  country_prefix_tuple = self.COUNTRIES_PREFIXES.get(country)
42
42
  if country_prefix_tuple is not None:
43
43
  country_prefix, number_of_digits = country_prefix_tuple
44
44
  if len(str(phone)) == number_of_digits:
45
- row[self.phone_column] = int(country_prefix + str(phone))
46
- return row
45
+ return int(country_prefix + str(phone))
46
+ return phone
47
47
 
48
48
  def phone_to_int(self, df: pd.DataFrame) -> pd.DataFrame:
49
49
  """
@@ -1 +0,0 @@
1
- __version__ = "1.1.312a3"
File without changes
File without changes
File without changes
File without changes
File without changes