upgini 1.2.133a1__py3-none-any.whl → 1.2.134__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.133a1"
1
+ __version__ = "1.2.134"
@@ -2938,7 +2938,7 @@ if response.status_code == 200:
2938
2938
  new_columns_on_transform = [c for c in validated_Xy.columns if c not in fit_input_columns]
2939
2939
 
2940
2940
  selected_generated_features = [
2941
- c for c in generated_features if not self.fit_select_features or c in self.feature_names_
2941
+ c for c in generated_features if c in self.feature_names_
2942
2942
  ]
2943
2943
  if keep_input is True:
2944
2944
  selected_input_columns = [
@@ -3245,7 +3245,7 @@ if response.status_code == 200:
3245
3245
  if fintech_warnings:
3246
3246
  for fintech_warning in fintech_warnings:
3247
3247
  self.__log_warning(fintech_warning)
3248
- df, full_duplicates_warning = clean_full_duplicates(df, self.logger, bundle=self.bundle)
3248
+ df, full_duplicates_warning = clean_full_duplicates(df, logger=self.logger, bundle=self.bundle)
3249
3249
  if full_duplicates_warning:
3250
3250
  if len(df) == 0:
3251
3251
  raise ValidationError(full_duplicates_warning)
@@ -84,30 +84,31 @@ class DateTimeConverter:
84
84
  return parsed is not None and not parsed.isna().all()
85
85
 
86
86
  def parse_datetime(self, df: pd.DataFrame, raise_errors=True) -> pd.Series | None:
87
- df = df.copy()
88
87
  if len(df) == 0 or df[self.date_column].isna().all():
89
88
  return None
90
89
 
90
+ date_col = df[self.date_column].copy()
91
+
91
92
  try:
92
- if df[self.date_column].apply(lambda x: isinstance(x, datetime.datetime)).all():
93
- parsed_datetime = df[self.date_column].apply(lambda x: x.replace(tzinfo=None))
94
- elif isinstance(df[self.date_column].dropna().values[0], datetime.date):
95
- parsed_datetime = pd.to_datetime(df[self.date_column], errors="coerce")
96
- elif isinstance(df[self.date_column].dtype, pd.PeriodDtype):
97
- parsed_datetime = df[self.date_column].dt.to_timestamp()
98
- elif is_numeric_dtype(df[self.date_column]):
93
+ if date_col.apply(lambda x: isinstance(x, datetime.datetime)).all():
94
+ parsed_datetime = date_col.apply(lambda x: x.replace(tzinfo=None))
95
+ elif isinstance(date_col.dropna().values[0], datetime.date):
96
+ parsed_datetime = pd.to_datetime(date_col, errors="coerce")
97
+ elif isinstance(date_col.dtype, pd.PeriodDtype):
98
+ parsed_datetime = date_col.dt.to_timestamp()
99
+ elif is_numeric_dtype(date_col):
99
100
  # 315532801 - 2524608001 - seconds
100
101
  # 315532801000 - 2524608001000 - milliseconds
101
102
  # 315532801000000 - 2524608001000000 - microseconds
102
103
  # 315532801000000000 - 2524608001000000000 - nanoseconds
103
- if df[self.date_column].apply(lambda x: 10**16 < x).all():
104
- parsed_datetime = pd.to_datetime(df[self.date_column], unit="ns")
105
- elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
106
- parsed_datetime = pd.to_datetime(df[self.date_column], unit="us")
107
- elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
108
- parsed_datetime = pd.to_datetime(df[self.date_column], unit="ms")
109
- elif df[self.date_column].apply(lambda x: 10**8 < x < 10**11).all():
110
- parsed_datetime = pd.to_datetime(df[self.date_column], unit="s")
104
+ if date_col.apply(lambda x: 10**16 < x).all():
105
+ parsed_datetime = pd.to_datetime(date_col, unit="ns")
106
+ elif date_col.apply(lambda x: 10**14 < x < 10**16).all():
107
+ parsed_datetime = pd.to_datetime(date_col, unit="us")
108
+ elif date_col.apply(lambda x: 10**11 < x < 10**14).all():
109
+ parsed_datetime = pd.to_datetime(date_col, unit="ms")
110
+ elif date_col.apply(lambda x: 10**8 < x < 10**11).all():
111
+ parsed_datetime = pd.to_datetime(date_col, unit="s")
111
112
  else:
112
113
  msg = self.bundle.get("unsupported_date_type").format(self.date_column)
113
114
  if raise_errors:
@@ -115,8 +116,10 @@ class DateTimeConverter:
115
116
  else:
116
117
  return None
117
118
  else:
118
- df[self.date_column] = df[self.date_column].astype("string").apply(self.clean_date)
119
- parsed_datetime = self.parse_string_date(df, raise_errors)
119
+ date_col = date_col.astype("string") # .apply(self.clean_date)
120
+ parsed_datetime = self.parse_string_date(date_col.to_frame(self.date_column), raise_errors)
121
+ if parsed_datetime.isna().all():
122
+ raise ValidationError(self.bundle.get("invalid_date_format").format(self.date_column))
120
123
  parsed_datetime = parsed_datetime.dt.tz_localize(None)
121
124
  return parsed_datetime
122
125
  except Exception as e:
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: upgini
3
- Version: 1.2.133a1
3
+ Version: 1.2.134
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=2J0xRzQRVTKW9-UjHayhhp4WFFpAteaH5RVfrXavaz0,26
1
+ upgini/__about__.py,sha256=MT6QELtgqY26TnKAbjjKik1_WtJtBfrKgI7IxYRg6F0,24
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=Nm2ZmwyQqvTnymYpGUwyJWy7y2ebXlHMyYmGeGcyA_s,31652
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=Na-W1f5xQVUKF4_m2Bw8mM29kLD8N3U7p9-FU0J9bi8,234415
6
+ upgini/features_enricher.py,sha256=lnog1Ox6bE6ADBBwLk63w6VNOV_rnQfagi8CnpuPBbU,234390
7
7
  upgini/http.py,sha256=-J_wOpnwVnT0ebPC6sOs6fN3AWtCD0LJLu6nlYmxaqk,44348
8
8
  upgini/metadata.py,sha256=H3wiN37k-yqWZgbPD0tJzx8DzaCIkgmX5cybhByQWLg,12619
9
9
  upgini/metrics.py,sha256=KCPE_apPN-9BIdv6GqASbJVaB_gBcy8wzNApAcyaGo4,46020
@@ -52,7 +52,7 @@ upgini/utils/config.py,sha256=zFdnjchykfp_1Tm3Qep7phLzXBpXIOzr2tIuXchRBLw,1754
52
52
  upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
53
53
  upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
54
54
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
55
- upgini/utils/datetime_utils.py,sha256=l85UzSQLhtMeI2G6m-m8y8bCColCLSXNHb2-G6fKpLM,16988
55
+ upgini/utils/datetime_utils.py,sha256=aHeX0iJwPNCj12z_3uDUP_pjVR7bWGeUc-dtxrtM3Q8,17002
56
56
  upgini/utils/deduplicate_utils.py,sha256=CLX0QapRxB-ZVQT7yGvv1vSd2zac5SwRjCJavujdCps,11332
57
57
  upgini/utils/display_utils.py,sha256=MoTqXZJvC6pAqgOaI3V0FG-IU_LnMfrn4TDcNvUqsdg,13316
58
58
  upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
@@ -74,7 +74,7 @@ upgini/utils/target_utils.py,sha256=CihpV6SC95HwtlMH60rGAUzVDa4Id0Bva8ySprmNHlE,
74
74
  upgini/utils/track_info.py,sha256=NDKeQTUlZaYp15UoP-xLKGoDoJQ0drbDMwB0g9R0PUg,6427
75
75
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
76
76
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
77
- upgini-1.2.133a1.dist-info/METADATA,sha256=oveLN_pPi2K1BqqAnu5ZnGXVMl7TeD65Jg1biA1drE0,51135
78
- upgini-1.2.133a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
79
- upgini-1.2.133a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
80
- upgini-1.2.133a1.dist-info/RECORD,,
77
+ upgini-1.2.134.dist-info/METADATA,sha256=obO7_PRcJM6erbGiU4G8fXumU7Ekozv2xcvBqcP3lCk,51133
78
+ upgini-1.2.134.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
79
+ upgini-1.2.134.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
80
+ upgini-1.2.134.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any