upgini 1.2.154a4146.dev9__tar.gz → 1.2.155.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/PKG-INFO +2 -2
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/pyproject.toml +1 -1
- upgini-1.2.155.dev1/src/upgini/__about__.py +1 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/data_source/data_source_publisher.py +4 -2
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/features_enricher.py +11 -6
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/metadata.py +4 -3
- upgini-1.2.154a4146.dev9/src/upgini/__about__.py +0 -1
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/.gitignore +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/LICENSE +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/README.md +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/__init__.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/ads.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/dataset.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/errors.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/http.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/metrics.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/search_task.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/spinner.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/config.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/hash_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/one_hot_encoder.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/psi.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/sample_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.155.dev1
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -34,7 +34,7 @@ Requires-Dist: more-itertools==10.7.0
|
|
|
34
34
|
Requires-Dist: numpy<3.0.0,>=1.19.0
|
|
35
35
|
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
36
36
|
Requires-Dist: psutil>=5.9.0
|
|
37
|
-
Requires-Dist: pyarrow==
|
|
37
|
+
Requires-Dist: pyarrow==23.0.1
|
|
38
38
|
Requires-Dist: pydantic<3.0.0,>1.0.0
|
|
39
39
|
Requires-Dist: pyjwt>=2.8.0
|
|
40
40
|
Requires-Dist: python-bidi==0.4.2
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.155.dev1"
|
{upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/data_source/data_source_publisher.py
RENAMED
|
@@ -540,7 +540,8 @@ class DataSourcePublisher:
|
|
|
540
540
|
)
|
|
541
541
|
|
|
542
542
|
@staticmethod
|
|
543
|
-
def build_place_request(
|
|
543
|
+
def build_place_request(
|
|
544
|
+
data_table_uri: str,
|
|
544
545
|
search_keys: Dict[str, SearchKey],
|
|
545
546
|
update_frequency: Union[
|
|
546
547
|
Literal["Daily"], Literal["Weekly"], Literal["Monthly"], Literal["Quarterly"], Literal["Annually"]
|
|
@@ -564,7 +565,8 @@ class DataSourcePublisher:
|
|
|
564
565
|
force_percentile_generation: Optional[List[str]] = None,
|
|
565
566
|
_force_generation=False,
|
|
566
567
|
_silent=False,
|
|
567
|
-
ads_hints: Optional[list[AdsHint]] = None
|
|
568
|
+
ads_hints: Optional[list[AdsHint]] = None,
|
|
569
|
+
) -> Dict:
|
|
568
570
|
if data_table_uri is None or not data_table_uri.startswith("bq://"):
|
|
569
571
|
raise ValidationError(
|
|
570
572
|
"Unsupported data table uri. It should looks like bq://projectId.datasetId.tableId"
|
|
@@ -1772,7 +1772,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1772
1772
|
}
|
|
1773
1773
|
date_column = self._get_date_column(search_keys)
|
|
1774
1774
|
if date_column:
|
|
1775
|
-
cat_features.discard(date_column)
|
|
1775
|
+
cat_features.discard(to_renamed(date_column))
|
|
1776
1776
|
return sorted(cat_features)
|
|
1777
1777
|
|
|
1778
1778
|
def _resolve_client_features_in_sampled(
|
|
@@ -1878,7 +1878,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1878
1878
|
|
|
1879
1879
|
file_meta = self._search_task.get_file_metadata(self._get_trace_id())
|
|
1880
1880
|
fit_dropped_features = list(self.fit_dropped_features or file_meta.droppedColumns or [])
|
|
1881
|
-
renamed_to_original =
|
|
1881
|
+
renamed_to_original = dict(columns_renaming)
|
|
1882
|
+
original_to_renamed = {original: hashed for hashed, original in columns_renaming.items()}
|
|
1882
1883
|
excluding_search_keys_original = [renamed_to_original.get(sk, sk) for sk in excluding_search_keys]
|
|
1883
1884
|
|
|
1884
1885
|
excluded_client_columns = (
|
|
@@ -1886,7 +1887,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1886
1887
|
+ fit_dropped_features
|
|
1887
1888
|
+ [DateTimeConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
|
|
1888
1889
|
)
|
|
1889
|
-
excluded_client_columns_renamed = {
|
|
1890
|
+
excluded_client_columns_renamed = {original_to_renamed.get(c, c) for c in excluded_client_columns}
|
|
1890
1891
|
|
|
1891
1892
|
# Client columns for enriched metrics (respects select_features).
|
|
1892
1893
|
client_features = [
|
|
@@ -1937,9 +1938,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1937
1938
|
)
|
|
1938
1939
|
|
|
1939
1940
|
fitting_X = X_sorted[baseline_client_features_in_sampled].copy()
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1941
|
+
enriched_columns_for_fitting = []
|
|
1942
|
+
seen_enriched_columns: set[str] = set()
|
|
1943
|
+
for column in enriched_client_features_in_sampled + existing_selected_enriched_features:
|
|
1944
|
+
if column not in seen_enriched_columns:
|
|
1945
|
+
enriched_columns_for_fitting.append(column)
|
|
1946
|
+
seen_enriched_columns.add(column)
|
|
1947
|
+
fitting_enriched_X = enriched_X_sorted[enriched_columns_for_fitting].copy()
|
|
1943
1948
|
|
|
1944
1949
|
renamed_generate_features = [columns_renaming.get(c, c) for c in (self.generate_features or [])]
|
|
1945
1950
|
renamed_client_cat_features = [columns_renaming.get(c, c) for c in (client_cat_features or [])]
|
|
@@ -397,12 +397,13 @@ class AddInfo(BaseModel):
|
|
|
397
397
|
|
|
398
398
|
class AdsHintType(str, Enum):
|
|
399
399
|
# BW table column must have type `DATE`
|
|
400
|
-
DATE_CLUSTER_KEY = "DATE_CLUSTER_KEY"
|
|
401
|
-
DATE_CLUSTER_SYNTHETIC = "DATE_CLUSTER_SYNTHETIC"
|
|
400
|
+
DATE_CLUSTER_KEY = "DATE_CLUSTER_KEY" # cluster column also plays role of ADS search key
|
|
401
|
+
DATE_CLUSTER_SYNTHETIC = "DATE_CLUSTER_SYNTHETIC" # cluster column used only for partition elimination
|
|
402
|
+
|
|
402
403
|
|
|
403
404
|
class AdsHint(BaseModel):
|
|
404
405
|
adsHintType: AdsHintType
|
|
405
406
|
hintColumnName: str
|
|
406
407
|
# must be set if registered ADS defined as view and has clustered column in underlying table
|
|
407
408
|
# format: myDatasetId.myTableUsedInViewWithClusterField
|
|
408
|
-
fullyQualifiedTableName: Optional[str] = None
|
|
409
|
+
fullyQualifiedTableName: Optional[str] = None
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.154a4146.dev9"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/resource_bundle/strings.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.154a4146.dev9 → upgini-1.2.155.dev1}/src/upgini/utils/base_search_key_detector.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|