upgini 1.1.263a1__tar.gz → 1.1.264__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.1.263a1/src/upgini.egg-info → upgini-1.1.264}/PKG-INFO +1 -1
- {upgini-1.1.263a1 → upgini-1.1.264}/setup.py +1 -1
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/autofe/all_operands.py +3 -0
- upgini-1.1.264/src/upgini/autofe/date.py +53 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/autofe/feature.py +1 -1
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/autofe/operand.py +2 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/autofe/unary.py +15 -8
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/autofe/vector.py +5 -3
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/features_enricher.py +13 -22
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/resource_bundle/strings.properties +2 -2
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/datetime_utils.py +49 -1
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/deduplicate_utils.py +18 -61
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/target_utils.py +2 -6
- {upgini-1.1.263a1 → upgini-1.1.264/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini.egg-info/SOURCES.txt +2 -0
- upgini-1.1.264/tests/test_autofe_operands.py +27 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_datetime_utils.py +30 -2
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_features_enricher.py +2 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/LICENSE +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/README.md +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/pyproject.toml +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/setup.cfg +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/__init__.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/ads.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/dataset.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/errors.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/fingerprint.js +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/http.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/metadata.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/metrics.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/search_task.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/spinner.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_country_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_email_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_etalon_validation.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_metrics.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_phone_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_postal_code_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_target_utils.py +0 -0
- {upgini-1.1.263a1 → upgini-1.1.264}/tests/test_widget.py +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
|
+
from upgini.autofe.date import DateDiff, DateDiffType2
|
|
2
3
|
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
3
4
|
from upgini.autofe.operand import Operand
|
|
4
5
|
from upgini.autofe.unary import Abs, Log, Residual, Sqrt, Square, Sigmoid, Floor, Freq
|
|
@@ -35,6 +36,8 @@ ALL_OPERANDS: Dict[str, Operand] = {
|
|
|
35
36
|
Operand(name="GroupByThenNUnique", output_type="int", is_vectorizable=True, is_grouping=True),
|
|
36
37
|
Operand(name="GroupByThenFreq", output_type="float", is_grouping=True),
|
|
37
38
|
Sim(),
|
|
39
|
+
DateDiff(),
|
|
40
|
+
DateDiffType2(),
|
|
38
41
|
]
|
|
39
42
|
}
|
|
40
43
|
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from typing import Optional, Union
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from upgini.autofe.operand import PandasOperand
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DateDiffMixin:
|
|
9
|
+
diff_unit: str = "D"
|
|
10
|
+
left_unit: Optional[str] = None
|
|
11
|
+
right_unit: Optional[str] = None
|
|
12
|
+
|
|
13
|
+
def _convert_to_date(
|
|
14
|
+
self, x: Union[pd.DataFrame, pd.Series], unit: Optional[str]
|
|
15
|
+
) -> Union[pd.DataFrame, pd.Series]:
|
|
16
|
+
if isinstance(x, pd.DataFrame):
|
|
17
|
+
return x.apply(lambda y: self._convert_to_date(y, unit), axis=1)
|
|
18
|
+
|
|
19
|
+
return pd.to_datetime(x, unit=unit)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DateDiff(PandasOperand, DateDiffMixin):
|
|
23
|
+
name = "date_diff"
|
|
24
|
+
is_binary = True
|
|
25
|
+
has_symmetry_importance = True
|
|
26
|
+
|
|
27
|
+
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
28
|
+
left = self._convert_to_date(left, self.left_unit)
|
|
29
|
+
right = self._convert_to_date(right, self.right_unit)
|
|
30
|
+
return self.__replace_negative((left - right) / np.timedelta64(1, self.diff_unit))
|
|
31
|
+
|
|
32
|
+
def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
|
|
33
|
+
x[x < 0] = None
|
|
34
|
+
return x
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class DateDiffType2(PandasOperand, DateDiffMixin):
|
|
38
|
+
name = "date_diff_type2"
|
|
39
|
+
is_binary = True
|
|
40
|
+
has_symmetry_importance = True
|
|
41
|
+
is_vectorizable = False
|
|
42
|
+
|
|
43
|
+
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
44
|
+
left = self._convert_to_date(left, self.left_unit)
|
|
45
|
+
right = self._convert_to_date(right, self.right_unit)
|
|
46
|
+
future = right + (left.dt.year - right.dt.year).apply(
|
|
47
|
+
lambda y: np.datetime64("NaT") if np.isnan(y) else pd.tseries.offsets.DateOffset(years=y)
|
|
48
|
+
)
|
|
49
|
+
before = future[future < left]
|
|
50
|
+
future[future < left] = before + pd.tseries.offsets.DateOffset(years=1)
|
|
51
|
+
diff = (future - left) / np.timedelta64(1, self.diff_unit)
|
|
52
|
+
|
|
53
|
+
return diff
|
|
@@ -305,7 +305,7 @@ class FeatureGroup:
|
|
|
305
305
|
grouped_features = []
|
|
306
306
|
|
|
307
307
|
def groupby_func(f: Feature) -> Tuple[Operand, Union[Column, Feature]]:
|
|
308
|
-
return (f.op, f.children[0
|
|
308
|
+
return (f.op, f.children[0 if not f.op.is_vectorizable else f.op.group_index])
|
|
309
309
|
|
|
310
310
|
for op_child, features in itertools.groupby(candidates, groupby_func):
|
|
311
311
|
op, main_child = op_child
|
|
@@ -73,6 +73,8 @@ class PandasOperand(Operand, abc.ABC):
|
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
class VectorizableMixin(Operand):
|
|
76
|
+
group_index: int = 1
|
|
77
|
+
|
|
76
78
|
def validate_calculation(self, input_columns: List[str], **kwargs) -> Tuple[str, List[str]]:
|
|
77
79
|
if not kwargs.get(MAIN_COLUMN):
|
|
78
80
|
raise ValueError(f"Expected argument {MAIN_COLUMN} for grouping operator {self.name} not found")
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
from upgini.autofe.operand import PandasOperand
|
|
1
|
+
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
2
2
|
import numpy as np
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
class Abs(PandasOperand):
|
|
6
|
+
class Abs(PandasOperand, VectorizableMixin):
|
|
7
7
|
name = "abs"
|
|
8
8
|
is_unary = True
|
|
9
9
|
is_vectorizable = True
|
|
10
|
+
group_index = 0
|
|
10
11
|
|
|
11
12
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
12
13
|
return data.abs()
|
|
@@ -15,11 +16,12 @@ class Abs(PandasOperand):
|
|
|
15
16
|
return data.abs()
|
|
16
17
|
|
|
17
18
|
|
|
18
|
-
class Log(PandasOperand):
|
|
19
|
+
class Log(PandasOperand, VectorizableMixin):
|
|
19
20
|
name = "log"
|
|
20
21
|
is_unary = True
|
|
21
22
|
is_vectorizable = True
|
|
22
23
|
output_type = "float"
|
|
24
|
+
group_index = 0
|
|
23
25
|
|
|
24
26
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
25
27
|
return self._round_value(np.log(np.abs(data.replace(0, np.nan))), 10)
|
|
@@ -28,11 +30,12 @@ class Log(PandasOperand):
|
|
|
28
30
|
return self._round_value(np.log(data.replace(0, np.nan).abs()), 10)
|
|
29
31
|
|
|
30
32
|
|
|
31
|
-
class Sqrt(PandasOperand):
|
|
33
|
+
class Sqrt(PandasOperand, VectorizableMixin):
|
|
32
34
|
name = "sqrt"
|
|
33
35
|
is_unary = True
|
|
34
36
|
is_vectorizable = True
|
|
35
37
|
output_type = "float"
|
|
38
|
+
group_index = 0
|
|
36
39
|
|
|
37
40
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
38
41
|
return self._round_value(np.sqrt(np.abs(data)))
|
|
@@ -41,10 +44,11 @@ class Sqrt(PandasOperand):
|
|
|
41
44
|
return self._round_value(np.sqrt(data.abs()))
|
|
42
45
|
|
|
43
46
|
|
|
44
|
-
class Square(PandasOperand):
|
|
47
|
+
class Square(PandasOperand, VectorizableMixin):
|
|
45
48
|
name = "square"
|
|
46
49
|
is_unary = True
|
|
47
50
|
is_vectorizable = True
|
|
51
|
+
group_index = 0
|
|
48
52
|
|
|
49
53
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
50
54
|
return np.square(data)
|
|
@@ -53,11 +57,12 @@ class Square(PandasOperand):
|
|
|
53
57
|
return np.square(data)
|
|
54
58
|
|
|
55
59
|
|
|
56
|
-
class Sigmoid(PandasOperand):
|
|
60
|
+
class Sigmoid(PandasOperand, VectorizableMixin):
|
|
57
61
|
name = "sigmoid"
|
|
58
62
|
is_unary = True
|
|
59
63
|
is_vectorizable = True
|
|
60
64
|
output_type = "float"
|
|
65
|
+
group_index = 0
|
|
61
66
|
|
|
62
67
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
63
68
|
return self._round_value(1 / (1 + np.exp(-data)))
|
|
@@ -66,12 +71,13 @@ class Sigmoid(PandasOperand):
|
|
|
66
71
|
return self._round_value(1 / (1 + np.exp(-data)))
|
|
67
72
|
|
|
68
73
|
|
|
69
|
-
class Floor(PandasOperand):
|
|
74
|
+
class Floor(PandasOperand, VectorizableMixin):
|
|
70
75
|
name = "floor"
|
|
71
76
|
is_unary = True
|
|
72
77
|
is_vectorizable = True
|
|
73
78
|
output_type = "int"
|
|
74
79
|
input_type = "continuous"
|
|
80
|
+
group_index = 0
|
|
75
81
|
|
|
76
82
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
77
83
|
return np.floor(data)
|
|
@@ -80,11 +86,12 @@ class Floor(PandasOperand):
|
|
|
80
86
|
return np.floor(data)
|
|
81
87
|
|
|
82
88
|
|
|
83
|
-
class Residual(PandasOperand):
|
|
89
|
+
class Residual(PandasOperand, VectorizableMixin):
|
|
84
90
|
name = "residual"
|
|
85
91
|
is_unary = True
|
|
86
92
|
is_vectorizable = True
|
|
87
93
|
input_type = "continuous"
|
|
94
|
+
group_index = 0
|
|
88
95
|
|
|
89
96
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
90
97
|
return data - np.floor(data)
|
|
@@ -1,20 +1,22 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
import pandas as pd
|
|
3
|
-
from upgini.autofe.operand import PandasOperand
|
|
3
|
+
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
class Mean(PandasOperand):
|
|
6
|
+
class Mean(PandasOperand, VectorizableMixin):
|
|
7
7
|
name = "mean"
|
|
8
8
|
output_type = "float"
|
|
9
9
|
is_vector = True
|
|
10
|
+
group_index = 0
|
|
10
11
|
|
|
11
12
|
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
|
12
13
|
return pd.DataFrame(data).T.fillna(0).mean(axis=1)
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
class Sum(PandasOperand):
|
|
16
|
+
class Sum(PandasOperand, VectorizableMixin):
|
|
16
17
|
name = "sum"
|
|
17
18
|
is_vector = True
|
|
19
|
+
group_index = 0
|
|
18
20
|
|
|
19
21
|
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
|
20
22
|
return pd.DataFrame(data).T.fillna(0).sum(axis=1)
|
|
@@ -70,6 +70,7 @@ from upgini.utils.datetime_utils import (
|
|
|
70
70
|
DateTimeSearchKeyConverter,
|
|
71
71
|
is_blocked_time_series,
|
|
72
72
|
is_time_series,
|
|
73
|
+
validate_dates_distribution,
|
|
73
74
|
)
|
|
74
75
|
from upgini.utils.deduplicate_utils import (
|
|
75
76
|
clean_full_duplicates,
|
|
@@ -1685,9 +1686,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1685
1686
|
df = validated_X.copy()
|
|
1686
1687
|
|
|
1687
1688
|
df[TARGET] = validated_y
|
|
1688
|
-
|
|
1689
|
-
df = clean_full_duplicates(df, logger=self.logger, silent=True, bundle=self.bundle)
|
|
1690
|
-
|
|
1691
1689
|
num_samples = _num_samples(df)
|
|
1692
1690
|
if num_samples > Dataset.FIT_SAMPLE_THRESHOLD:
|
|
1693
1691
|
self.logger.info(f"Downsampling from {num_samples} to {Dataset.FIT_SAMPLE_ROWS}")
|
|
@@ -1922,7 +1920,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1922
1920
|
|
|
1923
1921
|
meaning_types = {col: key.value for col, key in search_keys.items()}
|
|
1924
1922
|
non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
|
|
1925
|
-
# Don't pass
|
|
1926
1923
|
if email_converted_to_hem:
|
|
1927
1924
|
non_keys_columns.append(email_column)
|
|
1928
1925
|
|
|
@@ -1944,7 +1941,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1944
1941
|
if add_fit_system_record_id:
|
|
1945
1942
|
df = self.__add_fit_system_record_id(df, dict(), search_keys)
|
|
1946
1943
|
df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
|
|
1947
|
-
non_keys_columns.append(SORT_ID)
|
|
1948
1944
|
|
|
1949
1945
|
columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform or []))
|
|
1950
1946
|
|
|
@@ -2221,6 +2217,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2221
2217
|
self.fit_search_keys = self.search_keys.copy()
|
|
2222
2218
|
self.fit_search_keys = self.__prepare_search_keys(validated_X, self.fit_search_keys, is_demo_dataset)
|
|
2223
2219
|
|
|
2220
|
+
validate_dates_distribution(
|
|
2221
|
+
validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter
|
|
2222
|
+
)
|
|
2223
|
+
|
|
2224
2224
|
has_date = self._get_date_column(self.fit_search_keys) is not None
|
|
2225
2225
|
model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
|
|
2226
2226
|
self._validate_binary_observations(validated_y, model_task_type)
|
|
@@ -2883,35 +2883,26 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2883
2883
|
|
|
2884
2884
|
# order by date and idempotent order by other keys
|
|
2885
2885
|
if self.cv not in [CVType.time_series, CVType.blocked_time_series]:
|
|
2886
|
-
sort_exclude_columns = [original_order_name, ORIGINAL_INDEX, EVAL_SET_INDEX, TARGET, "__target"]
|
|
2887
2886
|
if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
|
|
2888
2887
|
date_column = DateTimeSearchKeyConverter.DATETIME_COL
|
|
2889
|
-
sort_exclude_columns.append(self._get_date_column(search_keys))
|
|
2890
2888
|
else:
|
|
2891
2889
|
date_column = self._get_date_column(search_keys)
|
|
2892
2890
|
sort_columns = [date_column] if date_column is not None else []
|
|
2893
2891
|
|
|
2894
|
-
|
|
2892
|
+
other_search_keys = sorted(
|
|
2895
2893
|
[
|
|
2896
|
-
|
|
2897
|
-
for
|
|
2898
|
-
if
|
|
2899
|
-
and
|
|
2900
|
-
and df[
|
|
2894
|
+
sk
|
|
2895
|
+
for sk, key_type in search_keys.items()
|
|
2896
|
+
if key_type not in [SearchKey.DATE, SearchKey.DATETIME]
|
|
2897
|
+
and sk in df.columns
|
|
2898
|
+
and df[sk].nunique() > 1 # don't use constant keys for hash
|
|
2901
2899
|
]
|
|
2902
|
-
# [
|
|
2903
|
-
# sk
|
|
2904
|
-
# for sk, key_type in search_keys.items()
|
|
2905
|
-
# if key_type not in [SearchKey.DATE, SearchKey.DATETIME]
|
|
2906
|
-
# and sk in df.columns
|
|
2907
|
-
# and df[sk].nunique() > 1 # don't use constant keys for hash
|
|
2908
|
-
# ]
|
|
2909
2900
|
)
|
|
2910
2901
|
|
|
2911
2902
|
search_keys_hash = "search_keys_hash"
|
|
2912
|
-
if len(
|
|
2903
|
+
if len(other_search_keys) > 0:
|
|
2913
2904
|
sort_columns.append(search_keys_hash)
|
|
2914
|
-
df[search_keys_hash] = pd.util.hash_pandas_object(df[
|
|
2905
|
+
df[search_keys_hash] = pd.util.hash_pandas_object(df[sorted(other_search_keys)], index=False)
|
|
2915
2906
|
|
|
2916
2907
|
df = df.sort_values(by=sort_columns)
|
|
2917
2908
|
|
|
@@ -111,6 +111,7 @@ x_is_empty=X is empty
|
|
|
111
111
|
y_is_empty=y is empty
|
|
112
112
|
x_contains_reserved_column_name=Column name {} is reserved. Please rename column and try again
|
|
113
113
|
missing_generate_feature=\nWARNING: Feature {} specified in `generate_features` is not present in input columns: {}
|
|
114
|
+
x_unstable_by_date=\nWARNING: Your training sample is unstable in number of rows per date. It is recommended to redesign the training sample.
|
|
114
115
|
# eval set validation
|
|
115
116
|
unsupported_type_eval_set=Unsupported type of eval_set: {}. It should be list of tuples with two elements: X and y
|
|
116
117
|
eval_set_invalid_tuple_size=eval_set contains a tuple of size {}. It should contain only pairs of X and y
|
|
@@ -145,8 +146,7 @@ dataset_too_many_rows_registered=X rows limit for transform is {}. Please sample
|
|
|
145
146
|
dataset_empty_column_names=Some column names are empty. Add names please
|
|
146
147
|
dataset_full_duplicates=\nWARNING: {:.5f}% of the rows are fully duplicated
|
|
147
148
|
dataset_diff_target_duplicates=\nWARNING: {:.4f}% of rows ({}) in X and eval_set are duplicates with different y values. These rows will be deleted as incorrect\nIncorrect row indexes: {}
|
|
148
|
-
|
|
149
|
-
dataset_eval_diff_target_duplicates_fintech=\nWARNING: {:.4f}% of rows ({}) in eval{} X are duplicates, not taking into consideration dates, IP addresses and features from the training set, but have different y values. These rows have been removed to optimize search results.\nRemoved row indexes: {}
|
|
149
|
+
dataset_diff_target_duplicates_fintech=\nWARNING: {:.4f}% of rows ({}) in X and eval_set are duplicates, not taking into consideration dates, IP addresses and features from the training set, but have different y values. These rows have been removed to optimize search results.\nRemoved row indexes: {}
|
|
150
150
|
dataset_drop_old_dates=\nWARNING: We don't have data before '2000-01-01' and removed all earlier records from the search dataset
|
|
151
151
|
dataset_all_dates_old=There is empty train dataset after removing data before '2000-01-01'
|
|
152
152
|
dataset_invalid_target_type=Unexpected dtype of target for binary task type: {}. Expected int or bool
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import logging
|
|
3
3
|
import re
|
|
4
|
-
from typing import List, Optional
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
@@ -9,7 +9,9 @@ from dateutil.relativedelta import relativedelta
|
|
|
9
9
|
from pandas.api.types import is_numeric_dtype, is_period_dtype, is_string_dtype
|
|
10
10
|
|
|
11
11
|
from upgini.errors import ValidationError
|
|
12
|
+
from upgini.metadata import SearchKey
|
|
12
13
|
from upgini.resource_bundle import ResourceBundle, get_custom_bundle
|
|
14
|
+
from upgini.utils.warning_counter import WarningCounter
|
|
13
15
|
|
|
14
16
|
DATE_FORMATS = [
|
|
15
17
|
"%Y-%m-%d",
|
|
@@ -225,3 +227,49 @@ def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[st
|
|
|
225
227
|
|
|
226
228
|
is_diff_less_than_two_columns = grouped.apply(check_differences)
|
|
227
229
|
return is_diff_less_than_two_columns.all()
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def validate_dates_distribution(
|
|
233
|
+
X: pd.DataFrame,
|
|
234
|
+
search_keys: Dict[str, SearchKey],
|
|
235
|
+
logger: Optional[logging.Logger] = None,
|
|
236
|
+
bundle: Optional[ResourceBundle] = None,
|
|
237
|
+
warning_counter: Optional[WarningCounter] = None,
|
|
238
|
+
):
|
|
239
|
+
maybe_date_col = None
|
|
240
|
+
for key, key_type in search_keys.items():
|
|
241
|
+
if key_type in [SearchKey.DATE, SearchKey.DATETIME]:
|
|
242
|
+
maybe_date_col = key
|
|
243
|
+
|
|
244
|
+
if maybe_date_col is None:
|
|
245
|
+
for col in X.columns:
|
|
246
|
+
if col in search_keys:
|
|
247
|
+
continue
|
|
248
|
+
try:
|
|
249
|
+
pd.to_datetime(X[col])
|
|
250
|
+
maybe_date_col = col
|
|
251
|
+
break
|
|
252
|
+
except Exception:
|
|
253
|
+
pass
|
|
254
|
+
|
|
255
|
+
if maybe_date_col is None:
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
dates = pd.to_datetime(X[maybe_date_col]).dt.date
|
|
259
|
+
|
|
260
|
+
date_counts = dates.value_counts().sort_index()
|
|
261
|
+
|
|
262
|
+
date_counts_1 = date_counts[: round(len(date_counts) / 2)]
|
|
263
|
+
date_counts_2 = date_counts[round(len(date_counts) / 2) :]
|
|
264
|
+
ratio = date_counts_2.mean() / date_counts_1.mean()
|
|
265
|
+
|
|
266
|
+
if ratio > 1.2 or ratio < 0.8:
|
|
267
|
+
if warning_counter is not None:
|
|
268
|
+
warning_counter.increment()
|
|
269
|
+
if logger is None:
|
|
270
|
+
logger = logging.getLogger("muted_logger")
|
|
271
|
+
logger.setLevel("FATAL")
|
|
272
|
+
bundle = bundle or get_custom_bundle()
|
|
273
|
+
msg = bundle.get("x_unstable_by_date")
|
|
274
|
+
print(msg)
|
|
275
|
+
logger.warning(msg)
|
|
@@ -3,7 +3,7 @@ from typing import Dict, List, Optional, Union
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from upgini.metadata import
|
|
6
|
+
from upgini.metadata import SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
|
|
7
7
|
from upgini.resource_bundle import ResourceBundle
|
|
8
8
|
from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
|
|
9
9
|
from upgini.utils.target_utils import define_task
|
|
@@ -78,58 +78,20 @@ def remove_fintech_duplicates(
|
|
|
78
78
|
rows_with_diff_target = grouped_by_personal_cols.filter(has_diff_target_within_60_days)
|
|
79
79
|
if len(rows_with_diff_target) > 0:
|
|
80
80
|
unique_keys_to_delete = rows_with_diff_target[personal_cols].drop_duplicates()
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
else:
|
|
96
|
-
# Indices in train and eval_set can be the same so we remove rows from them separately
|
|
97
|
-
train = df.query(f"{EVAL_SET_INDEX} == 0")
|
|
98
|
-
train_rows_to_remove = pd.merge(train.reset_index(), unique_keys_to_delete, on=personal_cols)
|
|
99
|
-
train_rows_to_remove = train_rows_to_remove.set_index(train.index.name or "index")
|
|
100
|
-
train_perc = len(train_rows_to_remove) * 100 / len(train)
|
|
101
|
-
msg = bundle.get("dataset_train_diff_target_duplicates_fintech").format(
|
|
102
|
-
train_perc, len(train_rows_to_remove), train_rows_to_remove.index.to_list()
|
|
103
|
-
)
|
|
104
|
-
if not silent:
|
|
105
|
-
print(msg)
|
|
106
|
-
if logger:
|
|
107
|
-
logger.warning(msg)
|
|
108
|
-
logger.info(f"Train dataset shape before clean fintech duplicates: {train.shape}")
|
|
109
|
-
train = train[~train.index.isin(train_rows_to_remove.index)]
|
|
110
|
-
logger.info(f"Train dataset shape after clean fintech duplicates: {train.shape}")
|
|
111
|
-
|
|
112
|
-
evals = [df.query(f"{EVAL_SET_INDEX} == {i}") for i in df[EVAL_SET_INDEX].unique() if i != 0]
|
|
113
|
-
new_evals = []
|
|
114
|
-
for i, eval in enumerate(evals):
|
|
115
|
-
eval_rows_to_remove = pd.merge(eval.reset_index(), unique_keys_to_delete, on=personal_cols)
|
|
116
|
-
eval_rows_to_remove = eval_rows_to_remove.set_index(eval.index.name or "index")
|
|
117
|
-
eval_perc = len(eval_rows_to_remove) * 100 / len(eval)
|
|
118
|
-
msg = bundle.get("dataset_eval_diff_target_duplicates_fintech").format(
|
|
119
|
-
eval_perc, len(eval_rows_to_remove), i + 1, eval_rows_to_remove.index.to_list()
|
|
120
|
-
)
|
|
121
|
-
if not silent:
|
|
122
|
-
print(msg)
|
|
123
|
-
if logger:
|
|
124
|
-
logger.warning(msg)
|
|
125
|
-
logger.info(f"Eval {i + 1} dataset shape before clean fintech duplicates: {eval.shape}")
|
|
126
|
-
eval = eval[~eval.index.isin(eval_rows_to_remove.index)]
|
|
127
|
-
logger.info(f"Eval {i + 1} dataset shape after clean fintech duplicates: {eval.shape}")
|
|
128
|
-
new_evals.append(eval)
|
|
129
|
-
|
|
130
|
-
logger.info(f"Dataset shape before clean fintech duplicates: {df.shape}")
|
|
131
|
-
df = pd.concat([train] + new_evals)
|
|
132
|
-
logger.info(f"Dataset shape after clean fintech duplicates: {df.shape}")
|
|
81
|
+
rows_to_remove = pd.merge(df.reset_index(), unique_keys_to_delete, on=personal_cols)
|
|
82
|
+
rows_to_remove = rows_to_remove.set_index(df.index.name or "index")
|
|
83
|
+
perc = len(rows_to_remove) * 100 / len(df)
|
|
84
|
+
msg = bundle.get("dataset_diff_target_duplicates_fintech").format(
|
|
85
|
+
perc, len(rows_to_remove), rows_to_remove.index.to_list()
|
|
86
|
+
)
|
|
87
|
+
if not silent:
|
|
88
|
+
print(msg)
|
|
89
|
+
if logger:
|
|
90
|
+
logger.warning(msg)
|
|
91
|
+
logger.info(f"Dataset shape before clean fintech duplicates: {df.shape}")
|
|
92
|
+
df = df[~df.index.isin(rows_to_remove.index)]
|
|
93
|
+
logger.info(f"Dataset shape after clean fintech duplicates: {df.shape}")
|
|
94
|
+
|
|
133
95
|
return df
|
|
134
96
|
|
|
135
97
|
|
|
@@ -139,18 +101,14 @@ def clean_full_duplicates(
|
|
|
139
101
|
nrows = len(df)
|
|
140
102
|
if nrows == 0:
|
|
141
103
|
return df
|
|
142
|
-
# Remove
|
|
104
|
+
# Remove absolute duplicates (exclude system_record_id)
|
|
143
105
|
unique_columns = df.columns.tolist()
|
|
144
106
|
if SYSTEM_RECORD_ID in unique_columns:
|
|
145
107
|
unique_columns.remove(SYSTEM_RECORD_ID)
|
|
146
108
|
if SORT_ID in unique_columns:
|
|
147
109
|
unique_columns.remove(SORT_ID)
|
|
148
|
-
if EVAL_SET_INDEX in unique_columns:
|
|
149
|
-
unique_columns.remove(EVAL_SET_INDEX)
|
|
150
110
|
logger.info(f"Dataset shape before clean duplicates: {df.shape}")
|
|
151
|
-
|
|
152
|
-
# then we keep unique rows in train segment
|
|
153
|
-
df = df.drop_duplicates(subset=unique_columns, keep="first")
|
|
111
|
+
df = df.drop_duplicates(subset=unique_columns)
|
|
154
112
|
logger.info(f"Dataset shape after clean duplicates: {df.shape}")
|
|
155
113
|
nrows_after_full_dedup = len(df)
|
|
156
114
|
share_full_dedup = 100 * (1 - nrows_after_full_dedup / nrows)
|
|
@@ -165,7 +123,7 @@ def clean_full_duplicates(
|
|
|
165
123
|
marked_duplicates = df.duplicated(subset=unique_columns, keep=False)
|
|
166
124
|
if marked_duplicates.sum() > 0:
|
|
167
125
|
dups_indices = df[marked_duplicates].index.to_list()
|
|
168
|
-
nrows_after_tgt_dedup = len(df.drop_duplicates(subset=unique_columns
|
|
126
|
+
nrows_after_tgt_dedup = len(df.drop_duplicates(subset=unique_columns))
|
|
169
127
|
num_dup_rows = nrows_after_full_dedup - nrows_after_tgt_dedup
|
|
170
128
|
share_tgt_dedup = 100 * num_dup_rows / nrows_after_full_dedup
|
|
171
129
|
|
|
@@ -175,7 +133,6 @@ def clean_full_duplicates(
|
|
|
175
133
|
print(msg)
|
|
176
134
|
df = df.drop_duplicates(subset=unique_columns, keep=False)
|
|
177
135
|
logger.info(f"Dataset shape after clean invalid target duplicates: {df.shape}")
|
|
178
|
-
|
|
179
136
|
return df
|
|
180
137
|
|
|
181
138
|
|
|
@@ -132,9 +132,7 @@ def balance_undersample(
|
|
|
132
132
|
class_value = classes[class_idx]
|
|
133
133
|
class_count = vc[class_value]
|
|
134
134
|
sample_strategy[class_value] = min(class_count, quantile25_class_cnt * multiclass_bootstrap_loops)
|
|
135
|
-
sampler = RandomUnderSampler(
|
|
136
|
-
sampling_strategy=sample_strategy, random_state=random_state
|
|
137
|
-
)
|
|
135
|
+
sampler = RandomUnderSampler(sampling_strategy=sample_strategy, random_state=random_state)
|
|
138
136
|
X = df[SYSTEM_RECORD_ID]
|
|
139
137
|
X = X.to_frame(SYSTEM_RECORD_ID)
|
|
140
138
|
new_x, _ = sampler.fit_resample(X, target) # type: ignore
|
|
@@ -153,9 +151,7 @@ def balance_undersample(
|
|
|
153
151
|
minority_class = df[df[target_column] == min_class_value]
|
|
154
152
|
majority_class = df[df[target_column] != min_class_value]
|
|
155
153
|
sample_size = min(len(majority_class), min_sample_threshold - min_class_count)
|
|
156
|
-
sampled_majority_class = majority_class.sample(
|
|
157
|
-
n=sample_size, random_state=random_state
|
|
158
|
-
)
|
|
154
|
+
sampled_majority_class = majority_class.sample(n=sample_size, random_state=random_state)
|
|
159
155
|
resampled_data = df[
|
|
160
156
|
(df[SYSTEM_RECORD_ID].isin(minority_class[SYSTEM_RECORD_ID]))
|
|
161
157
|
| (df[SYSTEM_RECORD_ID].isin(sampled_majority_class[SYSTEM_RECORD_ID]))
|
|
@@ -24,6 +24,7 @@ src/upgini/ads_management/ads_manager.py
|
|
|
24
24
|
src/upgini/autofe/__init__.py
|
|
25
25
|
src/upgini/autofe/all_operands.py
|
|
26
26
|
src/upgini/autofe/binary.py
|
|
27
|
+
src/upgini/autofe/date.py
|
|
27
28
|
src/upgini/autofe/feature.py
|
|
28
29
|
src/upgini/autofe/groupby.py
|
|
29
30
|
src/upgini/autofe/operand.py
|
|
@@ -64,6 +65,7 @@ src/upgini/utils/sklearn_ext.py
|
|
|
64
65
|
src/upgini/utils/target_utils.py
|
|
65
66
|
src/upgini/utils/track_info.py
|
|
66
67
|
src/upgini/utils/warning_counter.py
|
|
68
|
+
tests/test_autofe_operands.py
|
|
67
69
|
tests/test_binary_dataset.py
|
|
68
70
|
tests/test_blocked_time_series.py
|
|
69
71
|
tests/test_categorical_dataset.py
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from upgini.autofe.date import DateDiff, DateDiffType2
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pandas.testing import assert_series_equal
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_date_diff():
|
|
9
|
+
df = pd.DataFrame(
|
|
10
|
+
[[datetime(1993, 12, 10), datetime(2022, 10, 10)], [datetime(2023, 10, 10), datetime(2022, 10, 10)]],
|
|
11
|
+
columns=["date1", "date2"],
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
operand = DateDiff()
|
|
15
|
+
expected_result = pd.Series([10531, None])
|
|
16
|
+
assert_series_equal(operand.calculate_binary(df.date2, df.date1), expected_result)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_date_diff_future():
|
|
20
|
+
df = pd.DataFrame(
|
|
21
|
+
[[datetime(1993, 12, 10), datetime(2022, 10, 10)], [datetime(1993, 4, 10), datetime(2022, 10, 10)]],
|
|
22
|
+
columns=["date1", "date2"],
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
operand = DateDiffType2()
|
|
26
|
+
expected_result = pd.Series([61.0, 182.0])
|
|
27
|
+
assert_series_equal(operand.calculate_binary(df.date2, df.date1), expected_result)
|
|
@@ -1,7 +1,13 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
1
|
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
3
|
|
|
4
|
-
from upgini.
|
|
4
|
+
from upgini.metadata import SearchKey
|
|
5
|
+
from upgini.utils.datetime_utils import (
|
|
6
|
+
is_blocked_time_series,
|
|
7
|
+
is_time_series,
|
|
8
|
+
validate_dates_distribution,
|
|
9
|
+
)
|
|
10
|
+
from upgini.utils.warning_counter import WarningCounter
|
|
5
11
|
|
|
6
12
|
pd.set_option("mode.chained_assignment", "raise")
|
|
7
13
|
|
|
@@ -183,3 +189,25 @@ def test_multivariate_time_series():
|
|
|
183
189
|
assert not is_blocked_time_series(df, "date", ["date"])
|
|
184
190
|
|
|
185
191
|
assert is_blocked_time_series(df, "date", ["date", "feature3"])
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def test_validate_dates_distribution():
|
|
195
|
+
df = pd.DataFrame({"date": ["2020-01-01"] * 10 + ["2020-02-01"] * 20 + ["2020-03-01"] * 30 + ["2020-04-01"] * 40})
|
|
196
|
+
warning_counter = WarningCounter()
|
|
197
|
+
validate_dates_distribution(df, {}, warning_counter=warning_counter)
|
|
198
|
+
assert warning_counter.has_warnings()
|
|
199
|
+
|
|
200
|
+
df = pd.DataFrame({"date": ["2020-05-01"] * 10 + ["2020-02-01"] * 20 + ["2020-03-01"] * 30 + ["2020-04-01"] * 40})
|
|
201
|
+
warning_counter = WarningCounter()
|
|
202
|
+
validate_dates_distribution(df, {}, warning_counter=warning_counter)
|
|
203
|
+
assert not warning_counter.has_warnings()
|
|
204
|
+
|
|
205
|
+
df = pd.DataFrame(
|
|
206
|
+
{
|
|
207
|
+
"date2": ["2020-05-01"] * 10 + ["2020-02-01"] * 20 + ["2020-03-01"] * 30 + ["2020-04-01"] * 40,
|
|
208
|
+
"date1": ["2020-01-01"] * 10 + ["2020-02-01"] * 20 + ["2020-03-01"] * 30 + ["2020-04-01"] * 40,
|
|
209
|
+
}
|
|
210
|
+
)
|
|
211
|
+
warning_counter = WarningCounter()
|
|
212
|
+
validate_dates_distribution(df, {"date1": SearchKey.DATE}, warning_counter=warning_counter)
|
|
213
|
+
assert warning_counter.has_warnings()
|
|
@@ -2164,6 +2164,8 @@ def test_idempotent_order_with_imbalanced_dataset(requests_mock: Mocker):
|
|
|
2164
2164
|
|
|
2165
2165
|
actual_result_df = result_wrapper.df.sort_values(by="system_record_id").reset_index(drop=True)
|
|
2166
2166
|
# actual_result_df.to_parquet(expected_result_path)
|
|
2167
|
+
actual_result_df["phone_num_a54a33"] = actual_result_df["phone_num_a54a33"].astype("Int64")
|
|
2168
|
+
actual_result_df["rep_date_f5d6bb"] = actual_result_df["rep_date_f5d6bb"].astype("Int64")
|
|
2167
2169
|
assert_frame_equal(actual_result_df, expected_result_df)
|
|
2168
2170
|
|
|
2169
2171
|
for i in range(5):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|