upgini 1.1.312a2__tar.gz → 1.1.312a4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (65) hide show
  1. {upgini-1.1.312a2 → upgini-1.1.312a4}/PKG-INFO +1 -1
  2. upgini-1.1.312a4/src/upgini/__about__.py +1 -0
  3. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/dataset.py +5 -3
  4. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/features_enricher.py +24 -35
  5. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/datetime_utils.py +7 -4
  6. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/email_utils.py +35 -17
  7. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/phone_utils.py +4 -4
  8. upgini-1.1.312a2/src/upgini/__about__.py +0 -1
  9. {upgini-1.1.312a2 → upgini-1.1.312a4}/.gitignore +0 -0
  10. {upgini-1.1.312a2 → upgini-1.1.312a4}/LICENSE +0 -0
  11. {upgini-1.1.312a2 → upgini-1.1.312a4}/README.md +0 -0
  12. {upgini-1.1.312a2 → upgini-1.1.312a4}/pyproject.toml +0 -0
  13. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/__init__.py +0 -0
  14. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/ads.py +0 -0
  15. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/ads_management/__init__.py +0 -0
  16. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/ads_management/ads_manager.py +0 -0
  17. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/autofe/__init__.py +0 -0
  18. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/autofe/all_operands.py +0 -0
  19. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/autofe/binary.py +0 -0
  20. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/autofe/date.py +0 -0
  21. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/autofe/feature.py +0 -0
  22. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/autofe/groupby.py +0 -0
  23. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/autofe/operand.py +0 -0
  24. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/autofe/unary.py +0 -0
  25. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/autofe/vector.py +0 -0
  26. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/data_source/__init__.py +0 -0
  27. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/data_source/data_source_publisher.py +0 -0
  28. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/errors.py +0 -0
  29. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/http.py +0 -0
  30. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/lazy_import.py +0 -0
  31. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/mdc/__init__.py +0 -0
  32. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/mdc/context.py +0 -0
  33. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/metadata.py +0 -0
  34. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/metrics.py +0 -0
  35. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/normalizer/__init__.py +0 -0
  36. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/normalizer/normalize_utils.py +0 -0
  37. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/resource_bundle/__init__.py +0 -0
  38. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/resource_bundle/exceptions.py +0 -0
  39. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/resource_bundle/strings.properties +0 -0
  40. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  41. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/sampler/__init__.py +0 -0
  42. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/sampler/base.py +0 -0
  43. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/sampler/random_under_sampler.py +0 -0
  44. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/sampler/utils.py +0 -0
  45. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/search_task.py +0 -0
  46. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/spinner.py +0 -0
  47. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/__init__.py +0 -0
  48. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/base_search_key_detector.py +0 -0
  49. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/blocked_time_series.py +0 -0
  50. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/country_utils.py +0 -0
  51. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/custom_loss_utils.py +0 -0
  52. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/cv_utils.py +0 -0
  53. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/deduplicate_utils.py +0 -0
  54. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/display_utils.py +0 -0
  55. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/features_validator.py +0 -0
  57. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/format.py +0 -0
  58. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/ip_utils.py +0 -0
  59. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/postal_code_utils.py +0 -0
  60. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/progress_bar.py +0 -0
  61. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/sklearn_ext.py +0 -0
  62. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/target_utils.py +0 -0
  63. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/track_info.py +0 -0
  64. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/utils/warning_counter.py +0 -0
  65. {upgini-1.1.312a2 → upgini-1.1.312a4}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.312a2
3
+ Version: 1.1.312a4
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.1.312a4"
@@ -302,7 +302,7 @@ class Dataset: # (pd.DataFrame):
302
302
  key
303
303
  for search_group in self.search_keys_checked
304
304
  for key in search_group
305
- if self.columns_renaming.get(key) != EmailSearchKeyConverter.EMAIL_ONE_DOMAIN_COLUMN_NAME
305
+ if not self.columns_renaming.get(key).endswith(EmailSearchKeyConverter.ONE_DOMAIN_SUFFIX)
306
306
  }
307
307
  ipv4_column = self.etalon_def_checked.get(FileColumnMeaningType.IP_ADDRESS.value)
308
308
  if (
@@ -440,9 +440,11 @@ class Dataset: # (pd.DataFrame):
440
440
  FileColumnMeaningType.DATETIME,
441
441
  # FileColumnMeaningType.IP_ADDRESS,
442
442
  }:
443
+ min_value = self.data[column_name].astype("Int64").min()
444
+ max_value = self.data[column_name].astype("Int64").max()
443
445
  min_max_values = NumericInterval(
444
- minValue=self.data[column_name].astype("Int64").min(),
445
- maxValue=self.data[column_name].astype("Int64").max(),
446
+ minValue=min_value,
447
+ maxValue=max_value,
446
448
  )
447
449
  else:
448
450
  min_max_values = None
@@ -91,7 +91,7 @@ from upgini.utils.display_utils import (
91
91
  prepare_and_show_report,
92
92
  show_request_quote_button,
93
93
  )
94
- from upgini.utils.email_utils import EmailSearchKeyConverter, EmailSearchKeyDetector
94
+ from upgini.utils.email_utils import EmailDomainGenerator, EmailSearchKeyConverter, EmailSearchKeyDetector
95
95
  from upgini.utils.features_validator import FeaturesValidator
96
96
  from upgini.utils.format import Format
97
97
  from upgini.utils.ip_utils import IpSearchKeyConverter
@@ -1212,29 +1212,6 @@ class FeaturesEnricher(TransformerMixin):
1212
1212
  def _has_paid_features(self, exclude_features_sources: Optional[List[str]]) -> bool:
1213
1213
  return self._has_features_with_commercial_schema(CommercialSchema.PAID.value, exclude_features_sources)
1214
1214
 
1215
- def _extend_x(self, x: pd.DataFrame, is_demo_dataset: bool) -> Tuple[pd.DataFrame, Dict[str, SearchKey]]:
1216
- search_keys = self.search_keys.copy()
1217
- search_keys = self.__prepare_search_keys(x, search_keys, is_demo_dataset, is_transform=True, silent_mode=True)
1218
-
1219
- extended_X = x.copy()
1220
- generated_features = []
1221
- date_column = SearchKey.find_key(search_keys, [SearchKey.DATE, SearchKey.DATETIME])
1222
- if date_column is not None:
1223
- converter = DateTimeSearchKeyConverter(
1224
- date_column, self.date_format, self.logger, self.bundle, silent_mode=True
1225
- )
1226
- extended_X = converter.convert(extended_X, keep_time=True)
1227
- generated_features.extend(converter.generated_features)
1228
- email_column = self._get_email_column(search_keys)
1229
- hem_column = self._get_hem_column(search_keys)
1230
- if email_column:
1231
- converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, [], self.logger)
1232
- extended_X = converter.convert(extended_X)
1233
- generated_features.extend(converter.generated_features)
1234
- generated_features = [f for f in generated_features if f in self.fit_generated_features]
1235
-
1236
- return extended_X, search_keys
1237
-
1238
1215
  def _is_input_same_as_fit(
1239
1216
  self,
1240
1217
  X: Union[pd.DataFrame, pd.Series, np.ndarray, None] = None,
@@ -1591,6 +1568,12 @@ class FeaturesEnricher(TransformerMixin):
1591
1568
  df = converter.convert(df, keep_time=True)
1592
1569
  generated_features = converter.generated_features
1593
1570
 
1571
+ email_columns = SearchKey.find_all_keys(search_keys, SearchKey.EMAIL)
1572
+ if email_columns:
1573
+ generator = EmailDomainGenerator(email_columns)
1574
+ df = generator.generate(df)
1575
+ generated_features.extend(generator.generated_features)
1576
+
1594
1577
  normalizer = Normalizer(self.search_keys, generated_features, self.bundle, self.logger, self.warning_counter)
1595
1578
  df = normalizer.normalize(df)
1596
1579
  columns_renaming = normalizer.columns_renaming
@@ -1607,13 +1590,6 @@ class FeaturesEnricher(TransformerMixin):
1607
1590
  self.logger.info(f"Downsampling from {num_samples} to {sample_rows}")
1608
1591
  df = df.sample(n=sample_rows, random_state=self.random_state)
1609
1592
 
1610
- email_column = self._get_email_column(search_keys)
1611
- hem_column = self._get_hem_column(search_keys)
1612
- if email_column:
1613
- converter = EmailSearchKeyConverter(
1614
- email_column, hem_column, search_keys, columns_renaming, [], self.bundle, self.logger
1615
- )
1616
- df = converter.convert(df)
1617
1593
  df = self.__add_fit_system_record_id(df, search_keys, SYSTEM_RECORD_ID)
1618
1594
  if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
1619
1595
  df = df.drop(columns=DateTimeSearchKeyConverter.DATETIME_COL)
@@ -2030,6 +2006,12 @@ class FeaturesEnricher(TransformerMixin):
2030
2006
  if self.add_date_if_missing:
2031
2007
  df = self._add_current_date_as_key(df, search_keys, self.logger, self.bundle)
2032
2008
 
2009
+ email_columns = SearchKey.find_all_keys(search_keys, SearchKey.EMAIL)
2010
+ if email_columns:
2011
+ generator = EmailDomainGenerator(email_columns)
2012
+ df = generator.generate(df)
2013
+ generated_features.extend(generator.generated_features)
2014
+
2033
2015
  normalizer = Normalizer(
2034
2016
  search_keys, generated_features, self.bundle, self.logger, self.warning_counter, silent_mode
2035
2017
  )
@@ -2053,7 +2035,6 @@ class FeaturesEnricher(TransformerMixin):
2053
2035
 
2054
2036
  email_column = self._get_email_column(search_keys)
2055
2037
  hem_column = self._get_hem_column(search_keys)
2056
- # email_converted_to_hem = False
2057
2038
  if email_column:
2058
2039
  converter = EmailSearchKeyConverter(
2059
2040
  email_column,
@@ -2064,7 +2045,6 @@ class FeaturesEnricher(TransformerMixin):
2064
2045
  self.logger,
2065
2046
  )
2066
2047
  df = converter.convert(df)
2067
- generated_features.extend(converter.generated_features)
2068
2048
 
2069
2049
  ip_column = self._get_ip_column(search_keys)
2070
2050
  if ip_column:
@@ -2099,7 +2079,9 @@ class FeaturesEnricher(TransformerMixin):
2099
2079
  for col in features_for_transform:
2100
2080
  meaning_types[col] = FileColumnMeaningType.FEATURE
2101
2081
  features_not_to_pass = [
2102
- c for c in df.columns if c not in search_keys.keys() and c not in features_for_transform and c != ENTITY_SYSTEM_RECORD_ID
2082
+ c
2083
+ for c in df.columns
2084
+ if c not in search_keys.keys() and c not in features_for_transform and c != ENTITY_SYSTEM_RECORD_ID
2103
2085
  ]
2104
2086
 
2105
2087
  if add_fit_system_record_id:
@@ -2446,6 +2428,14 @@ class FeaturesEnricher(TransformerMixin):
2446
2428
  if self.add_date_if_missing:
2447
2429
  df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
2448
2430
 
2431
+ email_columns = SearchKey.find_all_keys(self.fit_search_keys, SearchKey.EMAIL)
2432
+ if email_columns:
2433
+ generator = EmailDomainGenerator(
2434
+ email_columns
2435
+ )
2436
+ df = generator.generate(df)
2437
+ self.fit_generated_features.extend(generator.generated_features)
2438
+
2449
2439
  # Checks that need validated date
2450
2440
  validate_dates_distribution(df, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
2451
2441
 
@@ -2488,7 +2478,6 @@ class FeaturesEnricher(TransformerMixin):
2488
2478
  self.logger,
2489
2479
  )
2490
2480
  df = converter.convert(df)
2491
- self.fit_generated_features.extend(converter.generated_features)
2492
2481
 
2493
2482
  ip_column = self._get_ip_column(self.fit_search_keys)
2494
2483
  if ip_column:
@@ -1,6 +1,7 @@
1
1
  import datetime
2
2
  import logging
3
3
  import re
4
+ import pytz
4
5
  from typing import Dict, List, Optional
5
6
 
6
7
  import numpy as np
@@ -28,12 +29,13 @@ DATE_FORMATS = [
28
29
  "%Y-%m-%dT%H:%M:%S.%f",
29
30
  ]
30
31
 
31
- DATETIME_PATTERN = r"^[\d\s\.\-:T/]+$"
32
+ DATETIME_PATTERN = r"^[\d\s\.\-:T/+]+$"
32
33
 
33
34
 
34
35
  class DateTimeSearchKeyConverter:
35
36
  DATETIME_COL = "_date_time"
36
- MIN_SUPPORTED_DATE_TS = datetime.datetime(1999, 12, 31) # 946684800000 # 2000-01-01
37
+ # MIN_SUPPORTED_DATE_TS = datetime.datetime(1999, 12, 31) # 946684800000 # 2000-01-01
38
+ MIN_SUPPORTED_DATE_TS = pd.to_datetime(datetime.datetime(1999, 12, 31)).tz_localize(None)
37
39
 
38
40
  def __init__(
39
41
  self,
@@ -106,12 +108,13 @@ class DateTimeSearchKeyConverter:
106
108
  df[self.date_column] = df[self.date_column].astype("string").apply(self.clean_date)
107
109
  df[self.date_column] = self.parse_date(df)
108
110
 
109
- df = self.clean_old_dates(df)
110
-
111
111
  # If column with date is datetime then extract seconds of the day and minute of the hour
112
112
  # as additional features
113
113
  seconds = "datetime_seconds"
114
114
  df[self.date_column] = df[self.date_column].dt.tz_localize(None)
115
+
116
+ df = self.clean_old_dates(df)
117
+
115
118
  df[seconds] = (df[self.date_column] - df[self.date_column].dt.floor("D")).dt.seconds
116
119
 
117
120
  seconds_without_na = df[seconds].dropna()
@@ -28,10 +28,31 @@ class EmailSearchKeyDetector(BaseSearchKeyDetector):
28
28
  return is_email_count / all_count > 0.1
29
29
 
30
30
 
31
+ class EmailDomainGenerator:
32
+ DOMAIN_SUFFIX = "_domain"
33
+
34
+ def __init__(self, email_columns: List[str]):
35
+ self.email_columns = email_columns
36
+ self.generated_features = []
37
+
38
+ def generate(self, df: pd.DataFrame) -> pd.DataFrame:
39
+ for email_col in self.email_columns:
40
+ domain_feature = email_col + self.DOMAIN_SUFFIX
41
+ df[domain_feature] = df[email_col].apply(self._email_to_domain)
42
+ self.generated_features.append(domain_feature)
43
+ return df
44
+
45
+ @staticmethod
46
+ def _email_to_domain(email: str) -> Optional[str]:
47
+ if email is not None and isinstance(email, str) and "@" in email:
48
+ name_and_domain = email.split("@")
49
+ if len(name_and_domain) == 2 and len(name_and_domain[1]) > 0:
50
+ return name_and_domain[1]
51
+
52
+
31
53
  class EmailSearchKeyConverter:
32
- HEM_COLUMN_NAME = "hashed_email"
33
- DOMAIN_COLUMN_NAME = "email_domain"
34
- EMAIL_ONE_DOMAIN_COLUMN_NAME = "email_one_domain"
54
+ HEM_SUFFIX = "_hem"
55
+ ONE_DOMAIN_SUFFIX = "_one_domain"
35
56
 
36
57
  def __init__(
37
58
  self,
@@ -54,7 +75,6 @@ class EmailSearchKeyConverter:
54
75
  else:
55
76
  self.logger = logging.getLogger()
56
77
  self.logger.setLevel("FATAL")
57
- self.generated_features: List[str] = []
58
78
  self.email_converted_to_hem = False
59
79
 
60
80
  @staticmethod
@@ -78,18 +98,19 @@ class EmailSearchKeyConverter:
78
98
  df = df.copy()
79
99
  original_email_column = self.columns_renaming[self.email_column]
80
100
  if self.hem_column is None:
81
- df[self.HEM_COLUMN_NAME] = df[self.email_column].apply(self._email_to_hem)
82
- if df[self.HEM_COLUMN_NAME].isna().all():
101
+ hem_name = self.email_column + self.HEM_SUFFIX
102
+ df[hem_name] = df[self.email_column].apply(self._email_to_hem)
103
+ if df[hem_name].isna().all():
83
104
  msg = self.bundle.get("all_emails_invalid").format(self.email_column)
84
105
  print(msg)
85
106
  self.logger.warning(msg)
86
- df = df.drop(columns=self.HEM_COLUMN_NAME)
107
+ df = df.drop(columns=hem_name)
87
108
  del self.search_keys[self.email_column]
88
109
  return df
89
- self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
110
+ self.search_keys[hem_name] = SearchKey.HEM
90
111
  if self.email_column in self.unnest_search_keys:
91
- self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
92
- self.columns_renaming[self.HEM_COLUMN_NAME] = original_email_column # it could be upgini_email_unnest...
112
+ self.unnest_search_keys.append(hem_name)
113
+ self.columns_renaming[hem_name] = original_email_column # it could be upgini_email_unnest...
93
114
  self.email_converted_to_hem = True
94
115
  else:
95
116
  df[self.hem_column] = df[self.hem_column].astype("string").str.lower()
@@ -98,16 +119,13 @@ class EmailSearchKeyConverter:
98
119
  if self.email_column in self.unnest_search_keys:
99
120
  self.unnest_search_keys.remove(self.email_column)
100
121
 
101
- df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
102
- self.columns_renaming[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = original_email_column
103
- self.search_keys[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = SearchKey.EMAIL_ONE_DOMAIN
122
+ one_domain_name = self.email_column + self.ONE_DOMAIN_SUFFIX
123
+ df[one_domain_name] = df[self.email_column].apply(self._email_to_one_domain)
124
+ self.columns_renaming[one_domain_name] = original_email_column
125
+ self.search_keys[one_domain_name] = SearchKey.EMAIL_ONE_DOMAIN
104
126
 
105
127
  if self.email_converted_to_hem:
106
128
  df = df.drop(columns=self.email_column)
107
129
  del self.columns_renaming[self.email_column]
108
130
 
109
- df[self.DOMAIN_COLUMN_NAME] = df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME].str[1:]
110
- self.generated_features.append(self.DOMAIN_COLUMN_NAME)
111
- self.columns_renaming[self.DOMAIN_COLUMN_NAME] = original_email_column
112
-
113
131
  return df
@@ -29,21 +29,21 @@ class PhoneSearchKeyConverter:
29
29
  def convert(self, df: pd.DataFrame) -> pd.DataFrame:
30
30
  df = self.phone_to_int(df)
31
31
  if self.country_column is not None:
32
- df = df.apply(self.add_prefix, axis=1)
32
+ df[self.phone_column] = df.apply(self.add_prefix, axis=1)
33
33
  df[self.phone_column] = df[self.phone_column].astype("Int64")
34
34
  return df
35
35
 
36
36
  def add_prefix(self, row):
37
37
  phone = row[self.phone_column]
38
38
  if pd.isna(phone):
39
- return row
39
+ return phone
40
40
  country = row[self.country_column]
41
41
  country_prefix_tuple = self.COUNTRIES_PREFIXES.get(country)
42
42
  if country_prefix_tuple is not None:
43
43
  country_prefix, number_of_digits = country_prefix_tuple
44
44
  if len(str(phone)) == number_of_digits:
45
- row[self.phone_column] = int(country_prefix + str(phone))
46
- return row
45
+ return int(country_prefix + str(phone))
46
+ return phone
47
47
 
48
48
  def phone_to_int(self, df: pd.DataFrame) -> pd.DataFrame:
49
49
  """
@@ -1 +0,0 @@
1
- __version__ = "1.1.312a2"
File without changes
File without changes
File without changes
File without changes
File without changes