upgini 1.2.124__py3-none-any.whl → 1.2.127__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/autofe/binary.py +4 -3
- upgini/data_source/data_source_publisher.py +1 -9
- upgini/dataset.py +3 -1
- upgini/features_enricher.py +129 -76
- upgini/metadata.py +2 -0
- upgini/normalizer/normalize_utils.py +2 -2
- upgini/resource_bundle/strings.properties +2 -1
- upgini/search_task.py +12 -1
- upgini/utils/datetime_utils.py +103 -36
- upgini/utils/deduplicate_utils.py +2 -2
- upgini/utils/display_utils.py +44 -7
- upgini/utils/feature_info.py +18 -7
- {upgini-1.2.124.dist-info → upgini-1.2.127.dist-info}/METADATA +2 -1
- {upgini-1.2.124.dist-info → upgini-1.2.127.dist-info}/RECORD +17 -17
- {upgini-1.2.124.dist-info → upgini-1.2.127.dist-info}/WHEEL +0 -0
- {upgini-1.2.124.dist-info → upgini-1.2.127.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.127"
|
upgini/autofe/binary.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import abc
|
|
2
2
|
from typing import Optional
|
|
3
|
+
|
|
3
4
|
import Levenshtein
|
|
4
5
|
import numpy as np
|
|
5
6
|
import pandas as pd
|
|
@@ -201,7 +202,7 @@ class JaroWinklerSim1(StringSim):
|
|
|
201
202
|
has_symmetry_importance: bool = True
|
|
202
203
|
|
|
203
204
|
def _prepare_value(self, value: Optional[str]) -> Optional[str]:
|
|
204
|
-
return value
|
|
205
|
+
return value if value is not None and len(value) > 0 else None
|
|
205
206
|
|
|
206
207
|
def _similarity(self, left: str, right: str) -> float:
|
|
207
208
|
return jarowinkler_similarity(left, right)
|
|
@@ -216,7 +217,7 @@ class JaroWinklerSim2(StringSim):
|
|
|
216
217
|
has_symmetry_importance: bool = True
|
|
217
218
|
|
|
218
219
|
def _prepare_value(self, value: Optional[str]) -> Optional[str]:
|
|
219
|
-
return value[::-1] if value is not None else None
|
|
220
|
+
return value[::-1] if value is not None and len(value) > 0 else None
|
|
220
221
|
|
|
221
222
|
def _similarity(self, left: str, right: str) -> float:
|
|
222
223
|
return jarowinkler_similarity(left, right)
|
|
@@ -231,7 +232,7 @@ class LevenshteinSim(StringSim):
|
|
|
231
232
|
has_symmetry_importance: bool = True
|
|
232
233
|
|
|
233
234
|
def _prepare_value(self, value: Optional[str]) -> Optional[str]:
|
|
234
|
-
return value
|
|
235
|
+
return value if value is not None and len(value) > 0 else None
|
|
235
236
|
|
|
236
237
|
def _similarity(self, left: str, right: str) -> float:
|
|
237
238
|
return 1 - Levenshtein.distance(left, right) / max(len(left), len(right))
|
|
@@ -123,17 +123,9 @@ class DataSourcePublisher:
|
|
|
123
123
|
set(search_keys.values()) == {SearchKey.IP_RANGE_FROM, SearchKey.IP_RANGE_TO}
|
|
124
124
|
or set(search_keys.values()) == {SearchKey.IPV6_RANGE_FROM, SearchKey.IPV6_RANGE_TO}
|
|
125
125
|
or set(search_keys.values()) == {SearchKey.MSISDN_RANGE_FROM, SearchKey.MSISDN_RANGE_TO}
|
|
126
|
+
or snapshot_frequency_days is not None or join_date_abs_limit_days is not None
|
|
126
127
|
) and sort_column is None:
|
|
127
128
|
raise ValidationError("Sort column is required for passed search keys")
|
|
128
|
-
if (
|
|
129
|
-
set(search_keys.values()) == {SearchKey.PHONE, SearchKey.DATE}
|
|
130
|
-
and snapshot_frequency_days is None
|
|
131
|
-
and join_date_abs_limit_days is None
|
|
132
|
-
):
|
|
133
|
-
raise ValidationError(
|
|
134
|
-
"With MSISDN and DATE keys one of the snapshot_frequency_days or"
|
|
135
|
-
" join_date_abs_limit_days parameters is required"
|
|
136
|
-
)
|
|
137
129
|
if (
|
|
138
130
|
set(search_keys.values()) == {SearchKey.PHONE, SearchKey.DATE}
|
|
139
131
|
or set(search_keys.values()) == {SearchKey.HEM, SearchKey.DATE}
|
upgini/dataset.py
CHANGED
|
@@ -151,7 +151,9 @@ class Dataset:
|
|
|
151
151
|
def etalon_def_checked(self) -> Dict[str, str]:
|
|
152
152
|
if self.etalon_def is None:
|
|
153
153
|
self.etalon_def = {
|
|
154
|
-
v.value: k
|
|
154
|
+
v.value: k
|
|
155
|
+
for k, v in self.meaning_types_checked.items()
|
|
156
|
+
if v not in [FileColumnMeaningType.FEATURE, FileColumnMeaningType.DATE_FEATURE]
|
|
155
157
|
}
|
|
156
158
|
|
|
157
159
|
return self.etalon_def
|
upgini/features_enricher.py
CHANGED
|
@@ -76,7 +76,7 @@ from upgini.utils.custom_loss_utils import (
|
|
|
76
76
|
)
|
|
77
77
|
from upgini.utils.cv_utils import CVConfig, get_groups
|
|
78
78
|
from upgini.utils.datetime_utils import (
|
|
79
|
-
|
|
79
|
+
DateTimeConverter,
|
|
80
80
|
is_blocked_time_series,
|
|
81
81
|
is_dates_distribution_valid,
|
|
82
82
|
is_time_series,
|
|
@@ -220,7 +220,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
220
220
|
cv: CVType | None = None,
|
|
221
221
|
loss: str | None = None,
|
|
222
222
|
autodetect_search_keys: bool = True,
|
|
223
|
+
# deprecated, use text_features instead
|
|
223
224
|
generate_features: list[str] | None = None,
|
|
225
|
+
text_features: list[str] | None = None,
|
|
224
226
|
columns_for_online_api: list[str] | None = None,
|
|
225
227
|
round_embeddings: int | None = None,
|
|
226
228
|
logs_enabled: bool = True,
|
|
@@ -305,10 +307,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
305
307
|
search_task = SearchTask(search_id, rest_client=self.rest_client, logger=self.logger)
|
|
306
308
|
|
|
307
309
|
print(self.bundle.get("search_by_task_id_start"))
|
|
308
|
-
trace_id =
|
|
309
|
-
|
|
310
|
-
print(f"https://app.datadoghq.eu/logs?query=%40trace_id%3A{trace_id}")
|
|
311
|
-
with MDC(trace_id=trace_id):
|
|
310
|
+
trace_id = time.time_ns()
|
|
311
|
+
with MDC(correlation_id=trace_id):
|
|
312
312
|
try:
|
|
313
313
|
self.logger.debug(f"FeaturesEnricher created from existing search: {search_id}")
|
|
314
314
|
self._search_task = search_task.poll_result(trace_id, quiet=True, check_fit=True)
|
|
@@ -342,14 +342,14 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
342
342
|
self.shared_datasets = shared_datasets
|
|
343
343
|
if shared_datasets is not None:
|
|
344
344
|
self.runtime_parameters.properties["shared_datasets"] = ",".join(shared_datasets)
|
|
345
|
-
self.generate_features = generate_features
|
|
345
|
+
self.generate_features = text_features or generate_features
|
|
346
346
|
self.round_embeddings = round_embeddings
|
|
347
|
-
if generate_features is not None:
|
|
348
|
-
if len(generate_features) > self.GENERATE_FEATURES_LIMIT:
|
|
347
|
+
if self.generate_features is not None:
|
|
348
|
+
if len(self.generate_features) > self.GENERATE_FEATURES_LIMIT:
|
|
349
349
|
msg = self.bundle.get("too_many_generate_features").format(self.GENERATE_FEATURES_LIMIT)
|
|
350
350
|
self.logger.error(msg)
|
|
351
351
|
raise ValidationError(msg)
|
|
352
|
-
self.runtime_parameters.properties["generate_features"] = ",".join(generate_features)
|
|
352
|
+
self.runtime_parameters.properties["generate_features"] = ",".join(self.generate_features)
|
|
353
353
|
if round_embeddings is not None:
|
|
354
354
|
if not isinstance(round_embeddings, int) or round_embeddings < 0:
|
|
355
355
|
msg = self.bundle.get("invalid_round_embeddings")
|
|
@@ -484,9 +484,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
484
484
|
stability_agg_func: str, optional (default="max")
|
|
485
485
|
Function to aggregate stability values. Can be "max", "min", "mean".
|
|
486
486
|
"""
|
|
487
|
-
trace_id =
|
|
487
|
+
trace_id = time.time_ns()
|
|
488
488
|
if self.print_trace_id:
|
|
489
|
-
print(f"https://app.datadoghq.eu/logs?query=%
|
|
489
|
+
print(f"https://app.datadoghq.eu/logs?query=%40correlation_id%3A{trace_id}")
|
|
490
490
|
start_time = time.time()
|
|
491
491
|
auto_fe_parameters = AutoFEParameters() if auto_fe_parameters is None else auto_fe_parameters
|
|
492
492
|
search_progress = SearchProgress(0.0, ProgressStage.START_FIT)
|
|
@@ -498,7 +498,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
498
498
|
progress_bar.progress = search_progress.to_progress_bar()
|
|
499
499
|
progress_bar.display()
|
|
500
500
|
|
|
501
|
-
with MDC(
|
|
501
|
+
with MDC(correlation_id=trace_id):
|
|
502
502
|
if len(args) > 0:
|
|
503
503
|
msg = f"WARNING: Unsupported positional arguments for fit: {args}"
|
|
504
504
|
self.logger.warning(msg)
|
|
@@ -643,11 +643,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
643
643
|
|
|
644
644
|
self.warning_counter.reset()
|
|
645
645
|
auto_fe_parameters = AutoFEParameters() if auto_fe_parameters is None else auto_fe_parameters
|
|
646
|
-
trace_id =
|
|
646
|
+
trace_id = time.time_ns()
|
|
647
647
|
if self.print_trace_id:
|
|
648
|
-
print(f"https://app.datadoghq.eu/logs?query=%
|
|
648
|
+
print(f"https://app.datadoghq.eu/logs?query=%40correlation_id%3A{trace_id}")
|
|
649
649
|
start_time = time.time()
|
|
650
|
-
with MDC(
|
|
650
|
+
with MDC(correlation_id=trace_id):
|
|
651
651
|
if len(args) > 0:
|
|
652
652
|
msg = f"WARNING: Unsupported positional arguments for fit_transform: {args}"
|
|
653
653
|
self.logger.warning(msg)
|
|
@@ -745,8 +745,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
745
745
|
def transform(
|
|
746
746
|
self,
|
|
747
747
|
X: pd.DataFrame,
|
|
748
|
-
*args,
|
|
749
748
|
y: pd.Series | None = None,
|
|
749
|
+
*args,
|
|
750
750
|
exclude_features_sources: list[str] | None = None,
|
|
751
751
|
keep_input: bool = True,
|
|
752
752
|
trace_id: str | None = None,
|
|
@@ -787,9 +787,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
787
787
|
progress_bar.progress = search_progress.to_progress_bar()
|
|
788
788
|
if new_progress:
|
|
789
789
|
progress_bar.display()
|
|
790
|
-
trace_id = trace_id or
|
|
790
|
+
trace_id = trace_id or time.time_ns()
|
|
791
|
+
if self.print_trace_id:
|
|
792
|
+
print(f"https://app.datadoghq.eu/logs?query=%40correlation_id%3A{trace_id}")
|
|
791
793
|
search_id = self.search_id or (self._search_task.search_task_id if self._search_task is not None else None)
|
|
792
|
-
with MDC(
|
|
794
|
+
with MDC(correlation_id=trace_id, search_id=search_id):
|
|
793
795
|
self.dump_input(trace_id, X)
|
|
794
796
|
if len(args) > 0:
|
|
795
797
|
msg = f"WARNING: Unsupported positional arguments for transform: {args}"
|
|
@@ -904,10 +906,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
904
906
|
Dataframe with metrics calculated on train and validation datasets.
|
|
905
907
|
"""
|
|
906
908
|
|
|
907
|
-
trace_id = trace_id or
|
|
909
|
+
trace_id = trace_id or time.time_ns()
|
|
908
910
|
start_time = time.time()
|
|
909
911
|
search_id = self.search_id or (self._search_task.search_task_id if self._search_task is not None else None)
|
|
910
|
-
with MDC(
|
|
912
|
+
with MDC(correlation_id=trace_id, search_id=search_id):
|
|
911
913
|
self.logger.info("Start calculate metrics")
|
|
912
914
|
if len(args) > 0:
|
|
913
915
|
msg = f"WARNING: Unsupported positional arguments for calculate_metrics: {args}"
|
|
@@ -1415,13 +1417,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1415
1417
|
# Find latest eval set or earliest if all eval sets are before train set
|
|
1416
1418
|
date_column = self._get_date_column(search_keys)
|
|
1417
1419
|
|
|
1418
|
-
date_converter =
|
|
1420
|
+
date_converter = DateTimeConverter(
|
|
1419
1421
|
date_column, self.date_format, self.logger, self.bundle, generate_cyclical_features=False
|
|
1420
1422
|
)
|
|
1421
1423
|
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
x_date = X[date_column].dropna()
|
|
1424
|
+
x_date = date_converter.to_date_ms(X).dropna()
|
|
1425
1425
|
if len(x_date) == 0:
|
|
1426
1426
|
self.logger.warning("Empty date column in X")
|
|
1427
1427
|
return []
|
|
@@ -1434,8 +1434,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1434
1434
|
if date_column not in eval_x.columns:
|
|
1435
1435
|
self.logger.warning(f"Date column not found in eval_set {i + 1}")
|
|
1436
1436
|
continue
|
|
1437
|
-
|
|
1438
|
-
eval_x_date = eval_x[date_column].dropna()
|
|
1437
|
+
eval_x_date = date_converter.to_date_ms(eval_x).dropna()
|
|
1439
1438
|
if len(eval_x_date) < 1000:
|
|
1440
1439
|
self.logger.warning(f"Eval_set {i} has less than 1000 rows. It will be ignored for stability check")
|
|
1441
1440
|
continue
|
|
@@ -1472,8 +1471,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1472
1471
|
)
|
|
1473
1472
|
checking_eval_set_df = checking_eval_set_df.copy()
|
|
1474
1473
|
|
|
1475
|
-
checking_eval_set_df[date_column] = eval_set_dates[selected_eval_set_idx]
|
|
1476
|
-
checking_eval_set_df = date_converter.convert(checking_eval_set_df)
|
|
1474
|
+
checking_eval_set_df[date_column] = date_converter.to_date_ms(eval_set_dates[selected_eval_set_idx].to_frame())
|
|
1477
1475
|
|
|
1478
1476
|
psi_values_sparse = calculate_sparsity_psi(
|
|
1479
1477
|
checking_eval_set_df, cat_features, date_column, self.logger, model_task_type
|
|
@@ -1745,9 +1743,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1745
1743
|
not in (
|
|
1746
1744
|
excluding_search_keys
|
|
1747
1745
|
+ list(self.fit_dropped_features)
|
|
1748
|
-
+ [
|
|
1746
|
+
+ [DateTimeConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
|
|
1749
1747
|
)
|
|
1750
1748
|
]
|
|
1749
|
+
if self.baseline_score_column is not None and self.baseline_score_column not in client_features:
|
|
1750
|
+
client_features.append(self.baseline_score_column)
|
|
1751
1751
|
self.logger.info(f"Client features column on prepare data for metrics: {client_features}")
|
|
1752
1752
|
|
|
1753
1753
|
selected_enriched_features = [c for c in self.feature_names_ if c not in client_features]
|
|
@@ -1995,7 +1995,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1995
1995
|
date_column = self._get_date_column(search_keys)
|
|
1996
1996
|
generated_features = []
|
|
1997
1997
|
if date_column is not None:
|
|
1998
|
-
converter =
|
|
1998
|
+
converter = DateTimeConverter(
|
|
1999
1999
|
date_column,
|
|
2000
2000
|
self.date_format,
|
|
2001
2001
|
self.logger,
|
|
@@ -2004,6 +2004,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2004
2004
|
)
|
|
2005
2005
|
# Leave original date column values
|
|
2006
2006
|
df_with_date_features = converter.convert(df, keep_time=True)
|
|
2007
|
+
# TODO check if this is correct
|
|
2007
2008
|
df_with_date_features[date_column] = df[date_column]
|
|
2008
2009
|
df = df_with_date_features
|
|
2009
2010
|
generated_features = converter.generated_features
|
|
@@ -2035,8 +2036,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2035
2036
|
# Sample after sorting by system_record_id for idempotency
|
|
2036
2037
|
df.sort_values(by=SYSTEM_RECORD_ID, inplace=True)
|
|
2037
2038
|
|
|
2038
|
-
if
|
|
2039
|
-
df = df.drop(columns=
|
|
2039
|
+
if DateTimeConverter.DATETIME_COL in df.columns:
|
|
2040
|
+
df = df.drop(columns=DateTimeConverter.DATETIME_COL)
|
|
2040
2041
|
|
|
2041
2042
|
df = df.rename(columns=columns_renaming)
|
|
2042
2043
|
generated_features = [columns_renaming.get(c, c) for c in generated_features]
|
|
@@ -2388,7 +2389,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2388
2389
|
def get_progress(self, trace_id: str | None = None, search_task: SearchTask | None = None) -> SearchProgress:
|
|
2389
2390
|
search_task = search_task or self._search_task
|
|
2390
2391
|
if search_task is not None:
|
|
2391
|
-
trace_id = trace_id or
|
|
2392
|
+
trace_id = trace_id or time.time_ns()
|
|
2392
2393
|
return search_task.get_progress(trace_id)
|
|
2393
2394
|
|
|
2394
2395
|
def display_transactional_transform_api(self, only_online_sources=False):
|
|
@@ -2416,7 +2417,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2416
2417
|
return "12345678"
|
|
2417
2418
|
return "test_value"
|
|
2418
2419
|
|
|
2419
|
-
file_metadata = self._search_task.get_file_metadata(
|
|
2420
|
+
file_metadata = self._search_task.get_file_metadata(time.time_ns())
|
|
2420
2421
|
|
|
2421
2422
|
def get_column_meta(column_name: str) -> FileColumnMetadata:
|
|
2422
2423
|
for c in file_metadata.columns:
|
|
@@ -2510,7 +2511,7 @@ if response.status_code == 200:
|
|
|
2510
2511
|
|
|
2511
2512
|
start_time = time.time()
|
|
2512
2513
|
search_id = self.search_id or (self._search_task.search_task_id if self._search_task is not None else None)
|
|
2513
|
-
with MDC(
|
|
2514
|
+
with MDC(correlation_id=trace_id, search_id=search_id):
|
|
2514
2515
|
self.logger.info("Start transform")
|
|
2515
2516
|
|
|
2516
2517
|
validated_X, validated_y, validated_eval_set = self._validate_train_eval(
|
|
@@ -2552,10 +2553,15 @@ if response.status_code == 200:
|
|
|
2552
2553
|
if transform_usage.has_limit:
|
|
2553
2554
|
if len(X) > transform_usage.rest_rows:
|
|
2554
2555
|
rest_rows = max(transform_usage.rest_rows, 0)
|
|
2555
|
-
|
|
2556
|
+
bundle_msg = (
|
|
2557
|
+
"transform_usage_warning_registered"
|
|
2558
|
+
if self.__is_registered
|
|
2559
|
+
else "transform_usage_warning_demo"
|
|
2560
|
+
)
|
|
2561
|
+
msg = self.bundle.get(bundle_msg).format(len(X), rest_rows)
|
|
2556
2562
|
self.logger.warning(msg)
|
|
2557
2563
|
print(msg)
|
|
2558
|
-
show_request_quote_button()
|
|
2564
|
+
show_request_quote_button(is_registered=self.__is_registered)
|
|
2559
2565
|
return None, {}, [], {}
|
|
2560
2566
|
else:
|
|
2561
2567
|
msg = self.bundle.get("transform_usage_info").format(
|
|
@@ -2599,7 +2605,7 @@ if response.status_code == 200:
|
|
|
2599
2605
|
generated_features = []
|
|
2600
2606
|
date_column = self._get_date_column(search_keys)
|
|
2601
2607
|
if date_column is not None:
|
|
2602
|
-
converter =
|
|
2608
|
+
converter = DateTimeConverter(
|
|
2603
2609
|
date_column,
|
|
2604
2610
|
self.date_format,
|
|
2605
2611
|
self.logger,
|
|
@@ -2656,8 +2662,8 @@ if response.status_code == 200:
|
|
|
2656
2662
|
|
|
2657
2663
|
# Don't pass all features in backend on transform
|
|
2658
2664
|
runtime_parameters = self._get_copy_of_runtime_parameters()
|
|
2659
|
-
features_for_transform = self._search_task.get_features_for_transform()
|
|
2660
|
-
if
|
|
2665
|
+
features_for_transform = self._search_task.get_features_for_transform()
|
|
2666
|
+
if features_for_transform:
|
|
2661
2667
|
missing_features_for_transform = [
|
|
2662
2668
|
columns_renaming.get(f) or f for f in features_for_transform if f not in df.columns
|
|
2663
2669
|
]
|
|
@@ -2668,7 +2674,10 @@ if response.status_code == 200:
|
|
|
2668
2674
|
raise ValidationError(
|
|
2669
2675
|
self.bundle.get("missing_features_for_transform").format(missing_features_for_transform)
|
|
2670
2676
|
)
|
|
2671
|
-
|
|
2677
|
+
features_for_embeddings = self._search_task.get_features_for_embeddings()
|
|
2678
|
+
if features_for_embeddings:
|
|
2679
|
+
runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_embeddings)
|
|
2680
|
+
features_for_transform = [f for f in features_for_transform if f not in search_keys.keys()]
|
|
2672
2681
|
|
|
2673
2682
|
columns_for_system_record_id = sorted(list(search_keys.keys()) + features_for_transform)
|
|
2674
2683
|
|
|
@@ -2729,8 +2738,22 @@ if response.status_code == 200:
|
|
|
2729
2738
|
)
|
|
2730
2739
|
df = converter.convert(df)
|
|
2731
2740
|
|
|
2741
|
+
date_features = []
|
|
2742
|
+
for col in features_for_transform:
|
|
2743
|
+
if DateTimeConverter(col).is_datetime(df):
|
|
2744
|
+
df[col] = DateTimeConverter(col).to_date_string(df)
|
|
2745
|
+
date_features.append(col)
|
|
2746
|
+
|
|
2732
2747
|
meaning_types = {}
|
|
2733
|
-
meaning_types.update(
|
|
2748
|
+
meaning_types.update(
|
|
2749
|
+
{
|
|
2750
|
+
col: FileColumnMeaningType.FEATURE
|
|
2751
|
+
for col in features_for_transform
|
|
2752
|
+
if col not in date_features and col not in generated_features
|
|
2753
|
+
}
|
|
2754
|
+
)
|
|
2755
|
+
meaning_types.update({col: FileColumnMeaningType.GENERATED_FEATURE for col in generated_features})
|
|
2756
|
+
meaning_types.update({col: FileColumnMeaningType.DATE_FEATURE for col in date_features})
|
|
2734
2757
|
meaning_types.update({col: key.value for col, key in search_keys.items()})
|
|
2735
2758
|
|
|
2736
2759
|
features_not_to_pass.extend(
|
|
@@ -2743,8 +2766,8 @@ if response.status_code == 200:
|
|
|
2743
2766
|
]
|
|
2744
2767
|
)
|
|
2745
2768
|
|
|
2746
|
-
if
|
|
2747
|
-
df = df.drop(columns=
|
|
2769
|
+
if DateTimeConverter.DATETIME_COL in df.columns:
|
|
2770
|
+
df = df.drop(columns=DateTimeConverter.DATETIME_COL)
|
|
2748
2771
|
|
|
2749
2772
|
# search keys might be changed after explode
|
|
2750
2773
|
columns_for_system_record_id = sorted(list(search_keys.keys()) + features_for_transform)
|
|
@@ -2926,6 +2949,7 @@ if response.status_code == 200:
|
|
|
2926
2949
|
or c in self.search_keys
|
|
2927
2950
|
or c in (self.id_columns or [])
|
|
2928
2951
|
or c in [EVAL_SET_INDEX, TARGET] # transform for metrics calculation
|
|
2952
|
+
or c == self.baseline_score_column
|
|
2929
2953
|
]
|
|
2930
2954
|
else:
|
|
2931
2955
|
selected_input_columns = []
|
|
@@ -3124,7 +3148,7 @@ if response.status_code == 200:
|
|
|
3124
3148
|
self.fit_generated_features = []
|
|
3125
3149
|
|
|
3126
3150
|
if has_date:
|
|
3127
|
-
converter =
|
|
3151
|
+
converter = DateTimeConverter(
|
|
3128
3152
|
maybe_date_column,
|
|
3129
3153
|
self.date_format,
|
|
3130
3154
|
self.logger,
|
|
@@ -3177,8 +3201,8 @@ if response.status_code == 200:
|
|
|
3177
3201
|
self.TARGET_NAME,
|
|
3178
3202
|
EVAL_SET_INDEX,
|
|
3179
3203
|
] + list(self.fit_search_keys.keys())
|
|
3180
|
-
if
|
|
3181
|
-
non_feature_columns.append(
|
|
3204
|
+
if DateTimeConverter.DATETIME_COL in df.columns:
|
|
3205
|
+
non_feature_columns.append(DateTimeConverter.DATETIME_COL)
|
|
3182
3206
|
|
|
3183
3207
|
features_columns = [c for c in df.columns if c not in non_feature_columns]
|
|
3184
3208
|
|
|
@@ -3265,15 +3289,28 @@ if response.status_code == 200:
|
|
|
3265
3289
|
ENTITY_SYSTEM_RECORD_ID,
|
|
3266
3290
|
SEARCH_KEY_UNNEST,
|
|
3267
3291
|
] + list(self.fit_search_keys.keys())
|
|
3268
|
-
if
|
|
3269
|
-
non_feature_columns.append(
|
|
3292
|
+
if DateTimeConverter.DATETIME_COL in df.columns:
|
|
3293
|
+
non_feature_columns.append(DateTimeConverter.DATETIME_COL)
|
|
3270
3294
|
|
|
3271
3295
|
features_columns = [c for c in df.columns if c not in non_feature_columns]
|
|
3272
3296
|
|
|
3297
|
+
# find date features
|
|
3298
|
+
date_features = []
|
|
3299
|
+
for col in features_columns:
|
|
3300
|
+
if DateTimeConverter(col).is_datetime(df):
|
|
3301
|
+
df[col] = DateTimeConverter(col).to_date_string(df)
|
|
3302
|
+
date_features.append(col)
|
|
3303
|
+
|
|
3273
3304
|
meaning_types = {
|
|
3274
3305
|
**{col: key.value for col, key in self.fit_search_keys.items()},
|
|
3275
|
-
**{
|
|
3306
|
+
**{
|
|
3307
|
+
str(c): FileColumnMeaningType.FEATURE
|
|
3308
|
+
for c in df.columns
|
|
3309
|
+
if c not in non_feature_columns and c not in date_features and c not in self.fit_generated_features
|
|
3310
|
+
},
|
|
3276
3311
|
}
|
|
3312
|
+
meaning_types.update({col: FileColumnMeaningType.GENERATED_FEATURE for col in self.fit_generated_features})
|
|
3313
|
+
meaning_types.update({col: FileColumnMeaningType.DATE_FEATURE for col in date_features})
|
|
3277
3314
|
meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
|
|
3278
3315
|
meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
|
3279
3316
|
if SEARCH_KEY_UNNEST in df.columns:
|
|
@@ -3294,8 +3331,8 @@ if response.status_code == 200:
|
|
|
3294
3331
|
self.bundle,
|
|
3295
3332
|
)
|
|
3296
3333
|
|
|
3297
|
-
if
|
|
3298
|
-
df = df.drop(columns=
|
|
3334
|
+
if DateTimeConverter.DATETIME_COL in df.columns:
|
|
3335
|
+
df = df.drop(columns=DateTimeConverter.DATETIME_COL)
|
|
3299
3336
|
|
|
3300
3337
|
meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
|
|
3301
3338
|
|
|
@@ -3332,7 +3369,14 @@ if response.status_code == 200:
|
|
|
3332
3369
|
dataset.columns_renaming = self.fit_columns_renaming
|
|
3333
3370
|
|
|
3334
3371
|
self.passed_features = [
|
|
3335
|
-
column
|
|
3372
|
+
column
|
|
3373
|
+
for column, meaning_type in meaning_types.items()
|
|
3374
|
+
if meaning_type
|
|
3375
|
+
in [
|
|
3376
|
+
FileColumnMeaningType.FEATURE,
|
|
3377
|
+
FileColumnMeaningType.DATE_FEATURE,
|
|
3378
|
+
FileColumnMeaningType.GENERATED_FEATURE,
|
|
3379
|
+
]
|
|
3336
3380
|
]
|
|
3337
3381
|
|
|
3338
3382
|
self._search_task = dataset.search(
|
|
@@ -3860,8 +3904,8 @@ if response.status_code == 200:
|
|
|
3860
3904
|
X = Xy.drop(columns=TARGET)
|
|
3861
3905
|
y = Xy[TARGET].copy()
|
|
3862
3906
|
|
|
3863
|
-
if
|
|
3864
|
-
X.drop(columns=
|
|
3907
|
+
if DateTimeConverter.DATETIME_COL in X.columns:
|
|
3908
|
+
X.drop(columns=DateTimeConverter.DATETIME_COL, inplace=True)
|
|
3865
3909
|
|
|
3866
3910
|
return X, y
|
|
3867
3911
|
|
|
@@ -3871,8 +3915,8 @@ if response.status_code == 200:
|
|
|
3871
3915
|
X: pd.DataFrame, y: pd.Series, search_keys: dict[str, SearchKey], cv: CVType | None
|
|
3872
3916
|
) -> tuple[pd.DataFrame, pd.Series]:
|
|
3873
3917
|
if cv not in [CVType.time_series, CVType.blocked_time_series]:
|
|
3874
|
-
if
|
|
3875
|
-
date_column =
|
|
3918
|
+
if DateTimeConverter.DATETIME_COL in X.columns:
|
|
3919
|
+
date_column = DateTimeConverter.DATETIME_COL
|
|
3876
3920
|
else:
|
|
3877
3921
|
date_column = FeaturesEnricher._get_date_column(search_keys)
|
|
3878
3922
|
sort_columns = [date_column] if date_column is not None else []
|
|
@@ -3900,8 +3944,8 @@ if response.status_code == 200:
|
|
|
3900
3944
|
|
|
3901
3945
|
y = Xy[TARGET].copy()
|
|
3902
3946
|
|
|
3903
|
-
if
|
|
3904
|
-
X.drop(columns=
|
|
3947
|
+
if DateTimeConverter.DATETIME_COL in X.columns:
|
|
3948
|
+
X.drop(columns=DateTimeConverter.DATETIME_COL, inplace=True)
|
|
3905
3949
|
|
|
3906
3950
|
return X, y
|
|
3907
3951
|
|
|
@@ -3980,12 +4024,10 @@ if response.status_code == 200:
|
|
|
3980
4024
|
maybe_date_col = SearchKey.find_key(self.search_keys, [SearchKey.DATE, SearchKey.DATETIME])
|
|
3981
4025
|
if X is not None and maybe_date_col is not None and maybe_date_col in X.columns:
|
|
3982
4026
|
# TODO cast date column to single dtype
|
|
3983
|
-
date_converter =
|
|
3984
|
-
|
|
3985
|
-
)
|
|
3986
|
-
|
|
3987
|
-
min_date = converted_X[maybe_date_col].min()
|
|
3988
|
-
max_date = converted_X[maybe_date_col].max()
|
|
4027
|
+
date_converter = DateTimeConverter(maybe_date_col, self.date_format, generate_cyclical_features=False)
|
|
4028
|
+
date_col_values = date_converter.to_date_ms(X)
|
|
4029
|
+
min_date = date_col_values.min()
|
|
4030
|
+
max_date = date_col_values.max()
|
|
3989
4031
|
self.logger.info(f"Dates interval is ({min_date}, {max_date})")
|
|
3990
4032
|
|
|
3991
4033
|
except Exception:
|
|
@@ -4022,7 +4064,7 @@ if response.status_code == 200:
|
|
|
4022
4064
|
self.__log_warning(bundle.get("current_date_added"))
|
|
4023
4065
|
df[FeaturesEnricher.CURRENT_DATE] = datetime.date.today()
|
|
4024
4066
|
search_keys[FeaturesEnricher.CURRENT_DATE] = SearchKey.DATE
|
|
4025
|
-
converter =
|
|
4067
|
+
converter = DateTimeConverter(FeaturesEnricher.CURRENT_DATE, generate_cyclical_features=False)
|
|
4026
4068
|
df = converter.convert(df)
|
|
4027
4069
|
return df
|
|
4028
4070
|
|
|
@@ -4153,8 +4195,8 @@ if response.status_code == 200:
|
|
|
4153
4195
|
"__target",
|
|
4154
4196
|
ENTITY_SYSTEM_RECORD_ID,
|
|
4155
4197
|
]
|
|
4156
|
-
if
|
|
4157
|
-
date_column =
|
|
4198
|
+
if DateTimeConverter.DATETIME_COL in df.columns:
|
|
4199
|
+
date_column = DateTimeConverter.DATETIME_COL
|
|
4158
4200
|
sort_exclude_columns.append(FeaturesEnricher._get_date_column(search_keys))
|
|
4159
4201
|
else:
|
|
4160
4202
|
date_column = FeaturesEnricher._get_date_column(search_keys)
|
|
@@ -4399,7 +4441,9 @@ if response.status_code == 200:
|
|
|
4399
4441
|
raise Exception(self.bundle.get("missing_features_meta"))
|
|
4400
4442
|
features_meta = deepcopy(features_meta)
|
|
4401
4443
|
|
|
4402
|
-
|
|
4444
|
+
file_metadata_columns = self._search_task.get_file_metadata(trace_id).columns
|
|
4445
|
+
file_meta_by_orig_name = {c.originalName: c for c in file_metadata_columns}
|
|
4446
|
+
original_names_dict = {c.name: c.originalName for c in file_metadata_columns}
|
|
4403
4447
|
features_df = self._search_task.get_all_initial_raw_features(trace_id, metrics_calculation=True)
|
|
4404
4448
|
|
|
4405
4449
|
# To be sure that names with hash suffixes
|
|
@@ -4419,7 +4463,11 @@ if response.status_code == 200:
|
|
|
4419
4463
|
original_name = original_names_dict.get(feature_meta.name, feature_meta.name)
|
|
4420
4464
|
feature_meta.name = original_name
|
|
4421
4465
|
|
|
4422
|
-
|
|
4466
|
+
file_meta = file_meta_by_orig_name.get(original_name)
|
|
4467
|
+
is_generated_feature = (
|
|
4468
|
+
file_meta is not None and file_meta.meaningType == FileColumnMeaningType.GENERATED_FEATURE
|
|
4469
|
+
)
|
|
4470
|
+
is_client_feature = original_name in clients_features_df.columns and not is_generated_feature
|
|
4423
4471
|
|
|
4424
4472
|
if selected_features is not None and feature_meta.name not in selected_features:
|
|
4425
4473
|
self.logger.info(f"Feature {feature_meta.name} is not selected before and skipped")
|
|
@@ -4442,9 +4490,13 @@ if response.status_code == 200:
|
|
|
4442
4490
|
|
|
4443
4491
|
for feature_meta in selected_features_meta:
|
|
4444
4492
|
original_name = original_names_dict.get(feature_meta.name, feature_meta.name)
|
|
4445
|
-
|
|
4493
|
+
file_meta = file_meta_by_orig_name.get(original_name)
|
|
4494
|
+
is_generated_feature = (
|
|
4495
|
+
file_meta is not None and file_meta.meaningType == FileColumnMeaningType.GENERATED_FEATURE
|
|
4496
|
+
)
|
|
4497
|
+
is_client_feature = original_name in clients_features_df.columns and not is_generated_feature
|
|
4446
4498
|
|
|
4447
|
-
if not is_client_feature:
|
|
4499
|
+
if not is_client_feature and not is_generated_feature:
|
|
4448
4500
|
self.external_source_feature_names.append(original_name)
|
|
4449
4501
|
|
|
4450
4502
|
if self.psi_values is not None:
|
|
@@ -4475,9 +4527,10 @@ if response.status_code == 200:
|
|
|
4475
4527
|
|
|
4476
4528
|
self.feature_names_.append(feature_meta.name)
|
|
4477
4529
|
self.feature_importances_.append(_round_shap_value(feature_meta.shap_value))
|
|
4478
|
-
|
|
4479
4530
|
df_for_sample = features_df if feature_meta.name in features_df.columns else clients_features_df
|
|
4480
|
-
feature_info = FeatureInfo.from_metadata(
|
|
4531
|
+
feature_info = FeatureInfo.from_metadata(
|
|
4532
|
+
feature_meta, df_for_sample, is_client_feature, is_generated_feature
|
|
4533
|
+
)
|
|
4481
4534
|
features_info.append(feature_info.to_row(self.bundle))
|
|
4482
4535
|
features_info_without_links.append(feature_info.to_row_without_links(self.bundle))
|
|
4483
4536
|
internal_features_info.append(feature_info.to_internal_row(self.bundle))
|
|
@@ -4488,7 +4541,7 @@ if response.status_code == 200:
|
|
|
4488
4541
|
if len(features_info) > 0:
|
|
4489
4542
|
self.features_info = pd.DataFrame(features_info)
|
|
4490
4543
|
# If all psi values are 0 or null, drop psi column
|
|
4491
|
-
if self.features_info[self.bundle.get("features_info_psi")].fillna(0.0).eq(0.0).all():
|
|
4544
|
+
if self.features_info[self.bundle.get("features_info_psi")].astype(np.float64).fillna(0.0).eq(0.0).all():
|
|
4492
4545
|
self.features_info.drop(columns=[self.bundle.get("features_info_psi")], inplace=True)
|
|
4493
4546
|
self._features_info_without_links = pd.DataFrame(features_info_without_links)
|
|
4494
4547
|
self._internal_features_info = pd.DataFrame(internal_features_info)
|
|
@@ -4954,7 +5007,7 @@ if response.status_code == 200:
|
|
|
4954
5007
|
eval_set: tuple | None = None,
|
|
4955
5008
|
):
|
|
4956
5009
|
def dump_task(X_, y_, eval_set_):
|
|
4957
|
-
with MDC(
|
|
5010
|
+
with MDC(correlation_id=trace_id):
|
|
4958
5011
|
try:
|
|
4959
5012
|
if isinstance(X_, pd.Series):
|
|
4960
5013
|
X_ = X_.to_frame()
|
upgini/metadata.py
CHANGED
|
@@ -25,7 +25,7 @@ from upgini.metadata import (
|
|
|
25
25
|
from upgini.resource_bundle import ResourceBundle, get_custom_bundle
|
|
26
26
|
from upgini.utils import find_numbers_with_decimal_comma
|
|
27
27
|
from upgini.utils.country_utils import CountrySearchKeyConverter
|
|
28
|
-
from upgini.utils.datetime_utils import
|
|
28
|
+
from upgini.utils.datetime_utils import DateTimeConverter
|
|
29
29
|
from upgini.utils.ip_utils import IpSearchKeyConverter
|
|
30
30
|
from upgini.utils.phone_utils import PhoneSearchKeyConverter
|
|
31
31
|
from upgini.utils.postal_code_utils import PostalCodeSearchKeyConverter
|
|
@@ -89,7 +89,7 @@ class Normalizer:
|
|
|
89
89
|
SYSTEM_RECORD_ID,
|
|
90
90
|
ENTITY_SYSTEM_RECORD_ID,
|
|
91
91
|
SEARCH_KEY_UNNEST,
|
|
92
|
-
|
|
92
|
+
DateTimeConverter.DATETIME_COL,
|
|
93
93
|
]:
|
|
94
94
|
self.columns_renaming[column] = column
|
|
95
95
|
new_columns.append(column)
|
|
@@ -12,7 +12,8 @@ polling_unregister_information=We'll send email notification once it's completed
|
|
|
12
12
|
ads_upload_finish=Thank you for your submission!\nWe'll check your data sharing proposal and get back to you
|
|
13
13
|
demo_dataset_info=Demo training dataset detected. Registration for an API key is not required.\n
|
|
14
14
|
transform_usage_info=You use Trial access to Upgini data enrichment. Limit for Trial: {} rows. You have already enriched: {} rows.
|
|
15
|
-
|
|
15
|
+
transform_usage_warning_demo=Unregistered-user limit: {} rows remaining; you requested {}.
|
|
16
|
+
transform_usage_warning_registered=Free tier limit: {} rows remaining; you requested {}.
|
|
16
17
|
|
|
17
18
|
# Warnings
|
|
18
19
|
support_link=https://upgini.com/support
|
upgini/search_task.py
CHANGED
|
@@ -165,10 +165,21 @@ class SearchTask:
|
|
|
165
165
|
|
|
166
166
|
return list(zero_hit_search_keys)
|
|
167
167
|
|
|
168
|
-
def
|
|
168
|
+
def get_features_for_embeddings(self) -> Optional[List[str]]:
|
|
169
169
|
if self.provider_metadata_v2 is None:
|
|
170
170
|
return None
|
|
171
171
|
|
|
172
|
+
features_for_transform = set()
|
|
173
|
+
for meta in self.provider_metadata_v2:
|
|
174
|
+
if meta.features_used_for_embeddings is not None:
|
|
175
|
+
features_for_transform.update(meta.features_used_for_embeddings)
|
|
176
|
+
|
|
177
|
+
return list(features_for_transform)
|
|
178
|
+
|
|
179
|
+
def get_features_for_transform(self) -> List[str]:
|
|
180
|
+
if self.provider_metadata_v2 is None:
|
|
181
|
+
return []
|
|
182
|
+
|
|
172
183
|
features_for_transform = set()
|
|
173
184
|
for meta in self.provider_metadata_v2:
|
|
174
185
|
if meta.features_used_for_embeddings is not None:
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -30,7 +30,7 @@ DATE_FORMATS = [
|
|
|
30
30
|
DATETIME_PATTERN = r"^[\d\s\.\-:T/+]+$"
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
class
|
|
33
|
+
class DateTimeConverter:
|
|
34
34
|
DATETIME_COL = "_date_time"
|
|
35
35
|
# MIN_SUPPORTED_DATE_TS = datetime.datetime(1999, 12, 31) # 946684800000 # 2000-01-01
|
|
36
36
|
MIN_SUPPORTED_DATE_TS = pd.to_datetime(datetime.datetime(1999, 12, 31)).tz_localize(None)
|
|
@@ -73,41 +73,99 @@ class DateTimeSearchKeyConverter:
|
|
|
73
73
|
except Exception:
|
|
74
74
|
return None
|
|
75
75
|
|
|
76
|
-
def
|
|
77
|
-
if len(df) == 0:
|
|
78
|
-
return
|
|
76
|
+
def is_datetime(self, df: pd.DataFrame) -> bool:
|
|
77
|
+
if len(df) == 0 or df[self.date_column].isna().all():
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
if pd.api.types.is_datetime64_any_dtype(df[self.date_column]):
|
|
81
|
+
return True
|
|
82
|
+
|
|
83
|
+
parsed = self.parse_datetime(df, raise_errors=False)
|
|
84
|
+
return parsed is not None and not parsed.isna().all()
|
|
79
85
|
|
|
86
|
+
def parse_datetime(self, df: pd.DataFrame, raise_errors=True) -> pd.Series | None:
|
|
80
87
|
df = df.copy()
|
|
81
|
-
if df[self.date_column].
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
88
|
+
if len(df) == 0 or df[self.date_column].isna().all():
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
if df[self.date_column].apply(lambda x: isinstance(x, datetime.datetime)).all():
|
|
93
|
+
parsed_datetime = df[self.date_column].apply(lambda x: x.replace(tzinfo=None))
|
|
94
|
+
elif isinstance(df[self.date_column].dropna().values[0], datetime.date):
|
|
95
|
+
parsed_datetime = pd.to_datetime(df[self.date_column], errors="coerce")
|
|
96
|
+
elif isinstance(df[self.date_column].dtype, pd.PeriodDtype):
|
|
97
|
+
parsed_datetime = df[self.date_column].dt.to_timestamp()
|
|
98
|
+
elif is_numeric_dtype(df[self.date_column]):
|
|
99
|
+
# 315532801 - 2524608001 - seconds
|
|
100
|
+
# 315532801000 - 2524608001000 - milliseconds
|
|
101
|
+
# 315532801000000 - 2524608001000000 - microseconds
|
|
102
|
+
# 315532801000000000 - 2524608001000000000 - nanoseconds
|
|
103
|
+
if df[self.date_column].apply(lambda x: 10**16 < x).all():
|
|
104
|
+
parsed_datetime = pd.to_datetime(df[self.date_column], unit="ns")
|
|
105
|
+
elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
|
|
106
|
+
parsed_datetime = pd.to_datetime(df[self.date_column], unit="us")
|
|
107
|
+
elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
|
|
108
|
+
parsed_datetime = pd.to_datetime(df[self.date_column], unit="ms")
|
|
109
|
+
elif df[self.date_column].apply(lambda x: 10**8 < x < 10**11).all():
|
|
110
|
+
parsed_datetime = pd.to_datetime(df[self.date_column], unit="s")
|
|
111
|
+
else:
|
|
112
|
+
msg = self.bundle.get("unsupported_date_type").format(self.date_column)
|
|
113
|
+
if raise_errors:
|
|
114
|
+
raise ValidationError(msg)
|
|
115
|
+
else:
|
|
116
|
+
return None
|
|
117
|
+
else:
|
|
118
|
+
df[self.date_column] = df[self.date_column].astype("string").apply(self.clean_date)
|
|
119
|
+
parsed_datetime = self.parse_string_date(df, raise_errors)
|
|
120
|
+
parsed_datetime = parsed_datetime.dt.tz_localize(None)
|
|
121
|
+
return parsed_datetime
|
|
122
|
+
except Exception as e:
|
|
123
|
+
if raise_errors:
|
|
124
|
+
raise ValidationError(e)
|
|
100
125
|
else:
|
|
101
|
-
|
|
102
|
-
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
def to_date_string(self, df: pd.DataFrame) -> pd.Series:
|
|
129
|
+
parsed_datetime = self.parse_datetime(df)
|
|
130
|
+
if parsed_datetime is None:
|
|
131
|
+
return df[self.date_column]
|
|
132
|
+
return parsed_datetime.dt.strftime("%Y-%m-%d")
|
|
133
|
+
|
|
134
|
+
def to_date_ms(self, df: pd.DataFrame) -> pd.Series:
|
|
135
|
+
parsed_datetime = self.parse_datetime(df)
|
|
136
|
+
if parsed_datetime is None:
|
|
137
|
+
return df[self.date_column]
|
|
138
|
+
return self.convert_datetime_to_date_ms(parsed_datetime)
|
|
139
|
+
|
|
140
|
+
def convert_datetime_to_datetime_ms(self, date_col: pd.Series) -> pd.Series:
|
|
141
|
+
if date_col.dt.unit == "ns":
|
|
142
|
+
date_col = date_col.astype(np.int64) // 1_000_000
|
|
143
|
+
elif date_col.dt.unit == "us":
|
|
144
|
+
date_col = date_col.astype(np.int64) // 1_000
|
|
145
|
+
elif date_col.dt.unit == "ms":
|
|
146
|
+
date_col = date_col.astype(np.int64)
|
|
147
|
+
elif date_col.dt.unit == "s":
|
|
148
|
+
date_col = date_col.astype(np.int64) * 1_000
|
|
103
149
|
else:
|
|
104
|
-
|
|
105
|
-
|
|
150
|
+
raise ValueError(f"Unsupported date unit: {date_col.dt.unit}")
|
|
151
|
+
|
|
152
|
+
return date_col.apply(self._int_to_opt).astype("Int64")
|
|
153
|
+
|
|
154
|
+
def convert_datetime_to_date_ms(self, date_col: pd.Series) -> pd.Series:
|
|
155
|
+
date_col = date_col.dt.floor("D")
|
|
156
|
+
return self.convert_datetime_to_datetime_ms(date_col)
|
|
157
|
+
|
|
158
|
+
def convert(self, df: pd.DataFrame, keep_time=False) -> pd.DataFrame:
|
|
159
|
+
df = df.copy()
|
|
160
|
+
parsed_datetime = self.parse_datetime(df)
|
|
161
|
+
if parsed_datetime is None:
|
|
162
|
+
return df
|
|
163
|
+
|
|
164
|
+
df[self.date_column] = parsed_datetime
|
|
106
165
|
|
|
107
166
|
# If column with date is datetime then extract seconds of the day and minute of the hour
|
|
108
167
|
# as additional features
|
|
109
168
|
seconds = "datetime_seconds"
|
|
110
|
-
df[self.date_column] = df[self.date_column].dt.tz_localize(None)
|
|
111
169
|
|
|
112
170
|
df = self.clean_old_dates(df)
|
|
113
171
|
|
|
@@ -182,21 +240,22 @@ class DateTimeSearchKeyConverter:
|
|
|
182
240
|
df.drop(columns=seconds, inplace=True)
|
|
183
241
|
|
|
184
242
|
if keep_time:
|
|
185
|
-
df[self.DATETIME_COL] = df[self.date_column]
|
|
186
|
-
|
|
187
|
-
df[self.date_column] = df[self.date_column].dt.floor("D").astype(np.int64) // 1_000_000
|
|
188
|
-
df[self.date_column] = df[self.date_column].apply(self._int_to_opt).astype("Int64")
|
|
243
|
+
df[self.DATETIME_COL] = self.convert_datetime_to_datetime_ms(df[self.date_column])
|
|
244
|
+
df[self.date_column] = self.convert_datetime_to_date_ms(df[self.date_column])
|
|
189
245
|
|
|
190
246
|
self.logger.info(f"Date after convertion to timestamp: {df[self.date_column]}")
|
|
191
247
|
|
|
192
248
|
return df
|
|
193
249
|
|
|
194
|
-
def
|
|
250
|
+
def parse_string_date(self, df: pd.DataFrame, raise_errors=True) -> pd.Series | None:
|
|
195
251
|
if self.date_format is not None:
|
|
196
252
|
try:
|
|
197
253
|
return pd.to_datetime(df[self.date_column], format=self.date_format)
|
|
198
254
|
except ValueError as e:
|
|
199
|
-
|
|
255
|
+
if raise_errors:
|
|
256
|
+
raise ValidationError(e)
|
|
257
|
+
else:
|
|
258
|
+
return None
|
|
200
259
|
else:
|
|
201
260
|
for date_format in DATE_FORMATS:
|
|
202
261
|
try:
|
|
@@ -204,9 +263,17 @@ class DateTimeSearchKeyConverter:
|
|
|
204
263
|
except ValueError:
|
|
205
264
|
pass
|
|
206
265
|
try:
|
|
207
|
-
|
|
266
|
+
# Suppress warning for intentional fallback to dateutil parsing
|
|
267
|
+
import warnings
|
|
268
|
+
|
|
269
|
+
with warnings.catch_warnings():
|
|
270
|
+
warnings.filterwarnings("ignore", message="Could not infer format")
|
|
271
|
+
return pd.to_datetime(df[self.date_column])
|
|
208
272
|
except ValueError:
|
|
209
|
-
|
|
273
|
+
if raise_errors:
|
|
274
|
+
raise ValidationError(self.bundle.get("invalid_date_format").format(self.date_column))
|
|
275
|
+
else:
|
|
276
|
+
return None
|
|
210
277
|
|
|
211
278
|
def clean_old_dates(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
212
279
|
condition = df[self.date_column] <= self.MIN_SUPPORTED_DATE_TS
|
|
@@ -14,7 +14,7 @@ from upgini.metadata import (
|
|
|
14
14
|
SearchKey,
|
|
15
15
|
)
|
|
16
16
|
from upgini.resource_bundle import ResourceBundle, get_custom_bundle
|
|
17
|
-
from upgini.utils.datetime_utils import
|
|
17
|
+
from upgini.utils.datetime_utils import DateTimeConverter
|
|
18
18
|
from upgini.utils.target_utils import define_task
|
|
19
19
|
|
|
20
20
|
|
|
@@ -104,7 +104,7 @@ def remove_fintech_duplicates(
|
|
|
104
104
|
sub_df = pd.merge(sub_df, nonunique_target_rows, on=personal_cols)
|
|
105
105
|
|
|
106
106
|
# Convert date columns for further checks
|
|
107
|
-
sub_df =
|
|
107
|
+
sub_df = DateTimeConverter(
|
|
108
108
|
date_col, date_format=date_format, logger=logger, bundle=bundle, generate_cyclical_features=False
|
|
109
109
|
).convert(sub_df)
|
|
110
110
|
grouped_by_personal_cols = sub_df.groupby(personal_cols, group_keys=False)
|
upgini/utils/display_utils.py
CHANGED
|
@@ -339,17 +339,54 @@ def show_button_download_pdf(
|
|
|
339
339
|
return display(HTML(html), display_id=display_id)
|
|
340
340
|
|
|
341
341
|
|
|
342
|
-
def show_request_quote_button():
|
|
342
|
+
def show_request_quote_button(is_registered: bool):
|
|
343
343
|
if not ipython_available():
|
|
344
|
-
|
|
344
|
+
if is_registered:
|
|
345
|
+
print("https://upgini.com/request-a-quote")
|
|
346
|
+
else:
|
|
347
|
+
print("https://profile.upgini.com/login")
|
|
345
348
|
else:
|
|
346
|
-
import
|
|
347
|
-
from
|
|
348
|
-
|
|
349
|
-
|
|
349
|
+
from IPython.display import HTML, display, Javascript
|
|
350
|
+
from ipywidgets import Layout, Button
|
|
351
|
+
|
|
352
|
+
if is_registered:
|
|
353
|
+
display(HTML("""
|
|
354
|
+
<style>
|
|
355
|
+
button.custom-button {
|
|
356
|
+
border: 1px solid black !important;
|
|
357
|
+
background: white !important;
|
|
358
|
+
color: black !important;
|
|
359
|
+
white-space: nowrap;
|
|
360
|
+
}
|
|
361
|
+
</style>
|
|
362
|
+
"""))
|
|
363
|
+
description = "Request a quote"
|
|
364
|
+
tooltip = "Ask a quote"
|
|
365
|
+
url = "https://upgini.com/request-a-quote"
|
|
366
|
+
else:
|
|
367
|
+
display(HTML("""
|
|
368
|
+
<style>
|
|
369
|
+
button.custom-button {
|
|
370
|
+
border: 1px solid #d00 !important;
|
|
371
|
+
background: #fff !important;
|
|
372
|
+
color: #d00 !important;
|
|
373
|
+
white-space: nowrap;
|
|
374
|
+
}
|
|
375
|
+
</style>
|
|
376
|
+
"""))
|
|
377
|
+
description = "Get an API KEY"
|
|
378
|
+
tooltip = "Register"
|
|
379
|
+
url = "https://profile.upgini.com/login"
|
|
380
|
+
|
|
381
|
+
button = Button(
|
|
382
|
+
description=description,
|
|
383
|
+
layout=Layout(width='auto'),
|
|
384
|
+
tooltip=tooltip
|
|
385
|
+
)
|
|
386
|
+
button.add_class("custom-button")
|
|
350
387
|
|
|
351
388
|
def on_button_clicked(b):
|
|
352
|
-
display(Javascript('window.open("
|
|
389
|
+
display(Javascript('window.open("' + url + '");'))
|
|
353
390
|
|
|
354
391
|
button.on_click(on_button_clicked)
|
|
355
392
|
|
upgini/utils/feature_info.py
CHANGED
|
@@ -31,7 +31,10 @@ class FeatureInfo:
|
|
|
31
31
|
|
|
32
32
|
@staticmethod
|
|
33
33
|
def from_metadata(
|
|
34
|
-
feature_meta: FeaturesMetadataV2,
|
|
34
|
+
feature_meta: FeaturesMetadataV2,
|
|
35
|
+
data: Optional[pd.DataFrame],
|
|
36
|
+
is_client_feature: bool,
|
|
37
|
+
is_generated_feature: bool,
|
|
35
38
|
) -> "FeatureInfo":
|
|
36
39
|
return FeatureInfo(
|
|
37
40
|
name=_get_name(feature_meta),
|
|
@@ -41,8 +44,8 @@ class FeatureInfo:
|
|
|
41
44
|
value_preview=_get_feature_sample(feature_meta, data),
|
|
42
45
|
provider=_get_provider(feature_meta, is_client_feature),
|
|
43
46
|
internal_provider=_get_internal_provider(feature_meta, is_client_feature),
|
|
44
|
-
source=_get_source(feature_meta, is_client_feature),
|
|
45
|
-
internal_source=_get_internal_source(feature_meta, is_client_feature),
|
|
47
|
+
source=_get_source(feature_meta, is_client_feature, is_generated_feature),
|
|
48
|
+
internal_source=_get_internal_source(feature_meta, is_client_feature, is_generated_feature),
|
|
46
49
|
update_frequency=feature_meta.update_frequency,
|
|
47
50
|
commercial_schema=feature_meta.commercial_schema,
|
|
48
51
|
doc_link=feature_meta.doc_link,
|
|
@@ -139,22 +142,30 @@ def _get_internal_provider(feature_meta: FeaturesMetadataV2, is_client_feature:
|
|
|
139
142
|
return "" if is_client_feature else (feature_meta.data_provider or "Upgini")
|
|
140
143
|
|
|
141
144
|
|
|
142
|
-
def _get_source(feature_meta: FeaturesMetadataV2, is_client_feature: bool) -> str:
|
|
145
|
+
def _get_source(feature_meta: FeaturesMetadataV2, is_client_feature: bool, is_generated_feature: bool) -> str:
|
|
146
|
+
if is_generated_feature:
|
|
147
|
+
return "AutoFE: features from Training dataset"
|
|
148
|
+
|
|
143
149
|
sources = _list_or_single(feature_meta.data_sources, feature_meta.data_source)
|
|
144
150
|
source_links = _list_or_single(feature_meta.data_source_links, feature_meta.data_source_link)
|
|
145
151
|
if sources:
|
|
146
152
|
source = _make_links(sources, source_links)
|
|
147
153
|
else:
|
|
148
|
-
source = _get_internal_source(feature_meta, is_client_feature)
|
|
154
|
+
source = _get_internal_source(feature_meta, is_client_feature, is_generated_feature)
|
|
149
155
|
return source
|
|
150
156
|
|
|
151
157
|
|
|
152
|
-
def _get_internal_source(feature_meta: FeaturesMetadataV2, is_client_feature: bool) -> str:
|
|
158
|
+
def _get_internal_source(feature_meta: FeaturesMetadataV2, is_client_feature: bool, is_generated_feature: bool) -> str:
|
|
159
|
+
if is_generated_feature:
|
|
160
|
+
return "AutoFE: features from Training dataset"
|
|
161
|
+
|
|
153
162
|
sources = _list_or_single(feature_meta.data_sources, feature_meta.data_source)
|
|
154
163
|
if sources:
|
|
155
164
|
return ", ".join(sources)
|
|
165
|
+
elif feature_meta.data_source:
|
|
166
|
+
return feature_meta.data_source
|
|
156
167
|
else:
|
|
157
|
-
return
|
|
168
|
+
return (
|
|
158
169
|
LLM_SOURCE
|
|
159
170
|
if not feature_meta.name.endswith("_country")
|
|
160
171
|
and not feature_meta.name.endswith("_postal_code")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.127
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -30,6 +30,7 @@ Requires-Dist: ipywidgets>=8.1.0
|
|
|
30
30
|
Requires-Dist: jarowinkler>=2.0.0
|
|
31
31
|
Requires-Dist: levenshtein>=0.25.1
|
|
32
32
|
Requires-Dist: lightgbm>=4.6.0
|
|
33
|
+
Requires-Dist: more-itertools==10.7.0
|
|
33
34
|
Requires-Dist: numpy<3.0.0,>=1.19.0
|
|
34
35
|
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
35
36
|
Requires-Dist: psutil>=5.9.0
|
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=h491OIJG19TxwsLIKSTYrHLAOPuj31b_J7sUaPKFa6c,24
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
-
upgini/dataset.py,sha256=
|
|
4
|
+
upgini/dataset.py,sha256=Nm2ZmwyQqvTnymYpGUwyJWy7y2ebXlHMyYmGeGcyA_s,31652
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=wC9hWu47gdn-dXs5yLHO9etjm3t7XVF-xpafF1gakWI,234470
|
|
7
7
|
upgini/http.py,sha256=-J_wOpnwVnT0ebPC6sOs6fN3AWtCD0LJLu6nlYmxaqk,44348
|
|
8
|
-
upgini/metadata.py,sha256=
|
|
8
|
+
upgini/metadata.py,sha256=H3wiN37k-yqWZgbPD0tJzx8DzaCIkgmX5cybhByQWLg,12619
|
|
9
9
|
upgini/metrics.py,sha256=KCPE_apPN-9BIdv6GqASbJVaB_gBcy8wzNApAcyaGo4,46020
|
|
10
|
-
upgini/search_task.py,sha256=
|
|
10
|
+
upgini/search_task.py,sha256=5mL_qV5mVtDkIumM9xCOgfa9Lc2B8mxJ1qI21iaScnQ,18656
|
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
|
13
13
|
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
|
14
14
|
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
|
15
15
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
upgini/autofe/all_operators.py,sha256=rdjF5eaE4bC6Q4eu_el5Z7ekYt8DjOFermz2bePPbUc,333
|
|
17
|
-
upgini/autofe/binary.py,sha256=
|
|
17
|
+
upgini/autofe/binary.py,sha256=o3TQuP3EnECAVIeToGczu4yJ4vX7BJ2iSCN9Ra1SZJI,7829
|
|
18
18
|
upgini/autofe/date.py,sha256=RvexgrL1_6ISYPVrl9HUQmPgpVSGQsTNv8YhNQWs-5M,11329
|
|
19
19
|
upgini/autofe/feature.py,sha256=W9sZHdz5Vi0H_oPyY5saZAPjyd5wunpULnCqrGLpQc4,16879
|
|
20
20
|
upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
|
|
@@ -31,14 +31,14 @@ upgini/autofe/timeseries/roll.py,sha256=zADKXU-eYWQnQ5R3am1yEal8uU6Tm0jLAixwPb_a
|
|
|
31
31
|
upgini/autofe/timeseries/trend.py,sha256=K1_iw2ko_LIUU8YCUgrvN3n0MkHtsi7-63-8x9er1k4,2129
|
|
32
32
|
upgini/autofe/timeseries/volatility.py,sha256=SvZfhM_ZAWCNpTf87WjSnZsnlblARgruDlu4By4Zvhc,8078
|
|
33
33
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
-
upgini/data_source/data_source_publisher.py,sha256=
|
|
34
|
+
upgini/data_source/data_source_publisher.py,sha256=CQi3fEukaStV-RiadSEvEFLThOlZJzA6PzleQQgGfGk,26286
|
|
35
35
|
upgini/mdc/__init__.py,sha256=iHJlXQg6xRM1-ZOUtaPSJqw5SpQDszvxp4LyqviNLIQ,1027
|
|
36
36
|
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
|
37
37
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
upgini/normalizer/normalize_utils.py,sha256=
|
|
38
|
+
upgini/normalizer/normalize_utils.py,sha256=w9f_9udrwqbhXgFMTs2keuce-6X_j6h3D7EdNo_2X7g,8493
|
|
39
39
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
40
40
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
41
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
41
|
+
upgini/resource_bundle/strings.properties,sha256=3aK2sxXYuvSLuoOyLq8IcyekfINH0Il5nLvVXMsuEpY,29353
|
|
42
42
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
43
43
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
upgini/sampler/base.py,sha256=Fva2FEhLiNRPZ9Q6uOtJRtRzwsayjv7aphalAZO_4lc,6452
|
|
@@ -52,12 +52,12 @@ upgini/utils/config.py,sha256=zFdnjchykfp_1Tm3Qep7phLzXBpXIOzr2tIuXchRBLw,1754
|
|
|
52
52
|
upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
|
|
53
53
|
upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
|
|
54
54
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
55
|
-
upgini/utils/datetime_utils.py,sha256=
|
|
56
|
-
upgini/utils/deduplicate_utils.py,sha256=
|
|
57
|
-
upgini/utils/display_utils.py,sha256=
|
|
55
|
+
upgini/utils/datetime_utils.py,sha256=l85UzSQLhtMeI2G6m-m8y8bCColCLSXNHb2-G6fKpLM,16988
|
|
56
|
+
upgini/utils/deduplicate_utils.py,sha256=6czbn1q0p-lOmrNvbAzueBpDHmfIP4TfV4poWqbjX5w,11255
|
|
57
|
+
upgini/utils/display_utils.py,sha256=p6o0VlYtGpU6bXv3B-fjQM9PeZEkl05OylHXSRyP0us,13219
|
|
58
58
|
upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
|
|
59
59
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
60
|
-
upgini/utils/feature_info.py,sha256=
|
|
60
|
+
upgini/utils/feature_info.py,sha256=SQTRbSxJDkh2G2c0KGBmOv8f69gVzWbTtcXn0_2Qb-8,7945
|
|
61
61
|
upgini/utils/features_validator.py,sha256=A_3AX7X5u5AH7RLgkTiS6dHxaOiq5vm8w4ijQWLGcMY,4871
|
|
62
62
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
63
63
|
upgini/utils/hash_utils.py,sha256=mP2yHyzvDNdpa5g3B4MHzulxBeEz_ZSoGl1YF_VnAyE,5538
|
|
@@ -74,7 +74,7 @@ upgini/utils/target_utils.py,sha256=GCPn4QeJ83JJ_vyBJ3IhY5fyIRkLC9q9BE59S2FRO1I,
|
|
|
74
74
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
75
75
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
|
76
76
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
77
|
-
upgini-1.2.
|
|
78
|
-
upgini-1.2.
|
|
79
|
-
upgini-1.2.
|
|
80
|
-
upgini-1.2.
|
|
77
|
+
upgini-1.2.127.dist-info/METADATA,sha256=KaZiSMDjzxqjhOoh3zY_EH9-kwLTMy71Us_ge2j-YyM,50781
|
|
78
|
+
upgini-1.2.127.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
79
|
+
upgini-1.2.127.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
80
|
+
upgini-1.2.127.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|