upgini 1.1.131a4__py3-none-any.whl → 1.1.132a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

@@ -139,15 +139,16 @@ class FeaturesEnricher(TransformerMixin):
139
139
  raise_validation_error: bool = False,
140
140
  **kwargs,
141
141
  ):
142
- self.api_key = api_key or os.environ.get(UPGINI_API_KEY)
142
+ self._api_key = api_key or os.environ.get(UPGINI_API_KEY)
143
143
  try:
144
- self.rest_client = get_rest_client(endpoint, self.api_key)
144
+ self.rest_client = get_rest_client(endpoint, self._api_key)
145
145
  except UpginiConnectionError as e:
146
146
  print(e)
147
147
  return
148
148
 
149
+ self.logs_enabled = logs_enabled
149
150
  if logs_enabled:
150
- self.logger = LoggerFactory().get_logger(endpoint, self.api_key)
151
+ self.logger = LoggerFactory().get_logger(endpoint, self._api_key)
151
152
  else:
152
153
  self.logger = logging.getLogger()
153
154
  self.logger.setLevel("FATAL")
@@ -174,7 +175,7 @@ class FeaturesEnricher(TransformerMixin):
174
175
  search_task = SearchTask(
175
176
  search_id,
176
177
  endpoint=self.endpoint,
177
- api_key=self.api_key,
178
+ api_key=self._api_key,
178
179
  )
179
180
 
180
181
  print(bundle.get("search_by_task_id_start"))
@@ -235,6 +236,16 @@ class FeaturesEnricher(TransformerMixin):
235
236
  self.__cached_sampled_datasets: Optional[Tuple[pd.DataFrame, pd.DataFrame, pd.Series, Dict, Dict]] = None
236
237
  self.raise_validation_error = raise_validation_error
237
238
 
239
+ def _get_api_key(self):
240
+ return self._api_key
241
+
242
+ def _set_api_key(self, api_key: str):
243
+ self._api_key = api_key
244
+ if self.logs_enabled:
245
+ self.logger = LoggerFactory().get_logger(self.endpoint, self._api_key)
246
+
247
+ api_key = property(_get_api_key, _set_api_key)
248
+
238
249
  def fit(
239
250
  self,
240
251
  X: Union[pd.DataFrame, pd.Series, np.ndarray],
@@ -579,13 +590,14 @@ class FeaturesEnricher(TransformerMixin):
579
590
  finally:
580
591
  self.logger.info(f"Transform elapsed time: {time.time() - start_time}")
581
592
 
582
- if self.country_added:
583
- result = drop_existing_columns(result, COUNTRY)
593
+ if result is not None:
594
+ if self.country_added:
595
+ result = drop_existing_columns(result, COUNTRY)
584
596
 
585
- if keep_input:
586
- return result
587
- else:
588
- return drop_existing_columns(result, X.columns)
597
+ if keep_input:
598
+ return result
599
+ else:
600
+ return drop_existing_columns(result, X.columns)
589
601
 
590
602
  def calculate_metrics(
591
603
  self,
@@ -912,13 +924,21 @@ class FeaturesEnricher(TransformerMixin):
912
924
  def _has_features_with_commercial_schema(
913
925
  self, commercial_schema: str, exclude_features_sources: Optional[List[str]]
914
926
  ) -> bool:
927
+ return len(self._get_features_with_commercial_schema(commercial_schema, exclude_features_sources)) > 0
928
+
929
+ def _get_features_with_commercial_schema(
930
+ self, commercial_schema: str, exclude_features_sources: Optional[List[str]]
931
+ ) -> List[str]:
915
932
  if exclude_features_sources:
916
933
  filtered_features_info = self.features_info[
917
934
  ~self.features_info[bundle.get("features_info_name")].isin(exclude_features_sources)
918
935
  ]
919
936
  else:
920
937
  filtered_features_info = self.features_info
921
- return (filtered_features_info[bundle.get("features_info_commercial_schema")] == commercial_schema).any()
938
+ return list(filtered_features_info.loc[
939
+ filtered_features_info[bundle.get("features_info_commercial_schema")] == commercial_schema,
940
+ bundle.get("features_info_name"),
941
+ ].values)
922
942
 
923
943
  def _has_trial_features(self, exclude_features_sources: Optional[List[str]]) -> bool:
924
944
  return self._has_features_with_commercial_schema(CommercialSchema.TRIAL.value, exclude_features_sources)
@@ -1248,7 +1268,6 @@ class FeaturesEnricher(TransformerMixin):
1248
1268
  msg = bundle.get("transform_with_trial_features")
1249
1269
  self.logger.warn(msg)
1250
1270
  print(msg)
1251
- return None
1252
1271
 
1253
1272
  columns_to_drop = [c for c in validated_X.columns if c in self.feature_names_]
1254
1273
  if len(columns_to_drop) > 0:
@@ -1493,7 +1512,8 @@ class FeaturesEnricher(TransformerMixin):
1493
1512
  if is_demo_dataset:
1494
1513
  msg = bundle.get("demo_dataset_info")
1495
1514
  self.logger.info(msg)
1496
- print(msg)
1515
+ if not self.__is_registered:
1516
+ print(msg)
1497
1517
 
1498
1518
  if self.generate_features is not None and len(self.generate_features) > 0:
1499
1519
  x_columns = list(validated_X.columns)
upgini/http.py CHANGED
@@ -11,8 +11,8 @@ from http.client import HTTPConnection
11
11
  from json import dumps
12
12
  from typing import Dict, List, Optional
13
13
  from urllib.parse import urljoin
14
- import pandas as pd
15
14
 
15
+ import pandas as pd
16
16
  import requests
17
17
  from pydantic import BaseModel
18
18
  from pythonjsonlogger import jsonlogger
@@ -355,7 +355,9 @@ class _RestClient:
355
355
  digest = md5_hash.hexdigest()
356
356
  metadata_with_md5 = metadata.copy(update={"checksumMD5": digest})
357
357
 
358
- digest_sha256 = hashlib.sha256(pd.util.hash_pandas_object(pd.read_parquet(file_path)).values).hexdigest()
358
+ digest_sha256 = hashlib.sha256(
359
+ pd.util.hash_pandas_object(pd.read_parquet(file_path)).values
360
+ ).hexdigest()
359
361
  metadata_with_md5 = metadata_with_md5.copy(update={"digest": digest_sha256})
360
362
 
361
363
  with open(file_path, "rb") as file:
@@ -433,8 +435,9 @@ class _RestClient:
433
435
  digest = md5_hash.hexdigest()
434
436
  metadata_with_md5 = metadata.copy(update={"checksumMD5": digest})
435
437
 
436
- import pandas as pd
437
- digest_sha256 = hashlib.sha256(pd.util.hash_pandas_object(pd.read_parquet(file_path)).values).hexdigest()
438
+ digest_sha256 = hashlib.sha256(
439
+ pd.util.hash_pandas_object(pd.read_parquet(file_path)).values
440
+ ).hexdigest()
438
441
  metadata_with_md5 = metadata_with_md5.copy(update={"digest": digest_sha256})
439
442
 
440
443
  with open(file_path, "rb") as file:
@@ -787,17 +790,18 @@ class BackendLogHandler(logging.Handler):
787
790
  def __init__(self, rest_client: _RestClient, *args, **kwargs) -> None:
788
791
  super().__init__(*args, **kwargs)
789
792
  self.rest_client = rest_client
790
- print("Before track metrics")
791
- self.track_metrics = get_track_metrics()
792
- print("After track metrics")
793
- if "ip" in self.track_metrics.keys():
794
- self.hostname = self.track_metrics["ip"]
795
- else:
796
- self.hostname = "0.0.0.0"
793
+ self.track_metrics = None
794
+ self.hostname = None
797
795
 
798
796
  def emit(self, record: logging.LogRecord) -> None:
799
797
  def task():
800
798
  try:
799
+ if self.track_metrics is None:
800
+ self.track_metrics = get_track_metrics()
801
+ if "ip" in self.track_metrics.keys():
802
+ self.hostname = self.track_metrics["ip"]
803
+ else:
804
+ self.hostname = "0.0.0.0"
801
805
  text = self.format(record)
802
806
  tags = self.track_metrics
803
807
  tags["version"] = __version__
@@ -848,11 +852,8 @@ class LoggerFactory:
848
852
 
849
853
  upgini_logger = logging.getLogger(f"upgini.{hash(key)}")
850
854
  upgini_logger.handlers.clear()
851
- print("Before rest client")
852
855
  rest_client = get_rest_client(backend_url, api_token)
853
- print("Before backend log handler")
854
856
  datadog_handler = BackendLogHandler(rest_client)
855
- print("After backend log handler")
856
857
  json_formatter = jsonlogger.JsonFormatter(
857
858
  "%(asctime)s %(threadName)s %(name)s %(levelname)s %(message)s",
858
859
  timestamp=True,
@@ -25,7 +25,8 @@ metrics_no_important_free_features=WARNING: No important free features to calcul
25
25
  metrics_no_important_features=WARNING: No important features to calculate metrics
26
26
  metrics_negative_uplift_without_cv=Please re-check that your task is not a time series prediction. If so, restart search with cv=CVType.time_series param for correct search results. See docs https://github.com/upgini/upgini#-time-series-prediction-support
27
27
  metrics_with_trial_features=The calculation of final accuracy metrics using Trial data is not available for unauthorized users.\nGet a free API key on https://upgini.com and repeat your request.
28
- transform_with_trial_features=Enriching with Trial data is not available for unauthorized users.\nGet a free API key on https://upgini.com and repeat your request.
28
+ transform_with_trial_features=WARNING: Your search results contain Trial data sources. To enrich your dataframe using transform or fit_transform with features from these Trial data sources, please register for a Free API key at https://upgini.com and resubmit your request.
29
+ # Enriching with Trial data is not available for unauthorized users.\nGet a free API key on https://upgini.com and repeat your request.
29
30
  metrics_with_paid_features=The calculation of final accuracy metrics using Paid data is not available.\nContact Upgini support for the data access
30
31
  transform_with_paid_features=Enriching with Paid data is not available.\nContact Upgini support for the data access
31
32
  trial_quota_limit_riched=WARNING: You have reached the quota limit of trial data usage. Please contact Upgini support to remove restriction
upgini/search_task.py CHANGED
@@ -59,8 +59,8 @@ class SearchTask:
59
59
  submitted_statuses = {"SUBMITTED", "VALIDATION_SUBMITTED"}
60
60
  if not quiet:
61
61
  print(bundle.get("polling_search_task").format(self.search_task_id))
62
- if is_demo_api_key(self.api_key):
63
- print(bundle.get("polling_unregister_information"))
62
+ if is_demo_api_key(self.api_key):
63
+ print(bundle.get("polling_unregister_information"))
64
64
  search_task_id = self.initial_search_task_id if self.initial_search_task_id is not None else self.search_task_id
65
65
 
66
66
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.131a4
3
+ Version: 1.1.132a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -33,7 +33,7 @@ Requires-Dist: pandas (<2.0.0,>=1.1.0)
33
33
  Requires-Dist: numpy (>=1.19.0)
34
34
  Requires-Dist: scikit-learn (>=1.0.1)
35
35
  Requires-Dist: pydantic (>=1.8.2)
36
- Requires-Dist: fastparquet (>=0.7.1)
36
+ Requires-Dist: fastparquet (>=0.8.1)
37
37
  Requires-Dist: python-json-logger (>=2.0.2)
38
38
  Requires-Dist: catboost (>=1.0.3)
39
39
  Requires-Dist: lightgbm (>=3.3.2)
@@ -2,11 +2,11 @@ upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
2
2
  upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
3
3
  upgini/dataset.py,sha256=uiba8qC2RNvqka5MQp-XgdYDE7-nqO5FojMO5a0n_HA,42978
4
4
  upgini/errors.py,sha256=BqpvfhW2jJW5fa5KXj0alhXatGl-WK4xTl309-QNLp8,959
5
- upgini/features_enricher.py,sha256=4nbec4wZEyKmUUVPbzCSBK683WlGdhUxkh3C8SlQT5A,117963
6
- upgini/http.py,sha256=yx5vFiY1TSYhKU4ZoBG9pDZAdYCGJYOuvglvuYpejwg,36191
5
+ upgini/features_enricher.py,sha256=iV9OHy8QFQdB5uUX6S2RuI28WBZFNkR5LZwRrhuB8Oo,118746
6
+ upgini/http.py,sha256=rY9kRwqErjC8qewANiBhkPObek-Hrm9037KEyQ3vpfs,36191
7
7
  upgini/metadata.py,sha256=Oefg-rkA4PsZUHIho_clZcnyZwdtVJ1gXPvEY6oBmpg,5969
8
8
  upgini/metrics.py,sha256=5tQ_6ZKUM0EBLnHmKZD63KHCQXTYGeE-uFLs0wHcYf4,15477
9
- upgini/search_task.py,sha256=4SbJhEqCUrntf-JrEsM4R_y3vebNEsJ5JXBeS7Y1K38,13589
9
+ upgini/search_task.py,sha256=cM-3cAcdnKHrCl7ZTqhHLrSkDVpdgowV5YBaFq2BzjY,13597
10
10
  upgini/spinner.py,sha256=yhakBaydMNS8E8TRAwTdCMdnWrHeWT0cR1M8c9hP6jA,1157
11
11
  upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
12
12
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
@@ -19,7 +19,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
19
19
  upgini/normalizer/phone_normalizer.py,sha256=VIgLXuDuzzjPEXiy_LyDVLZKGaS7-le6Fh6T4D-TQDU,9930
20
20
  upgini/resource_bundle/__init__.py,sha256=M7GtS7KPQw9pinz8P2aQWXpSkD2YFwUPVGk1w92Pn84,7888
21
21
  upgini/resource_bundle/exceptions.py,sha256=KT-OnqA2J4OTfLjhbEl3KFZM2ci7EOPjqJuY_rXp3vs,622
22
- upgini/resource_bundle/strings.properties,sha256=pOQuq5RFEYqsB1edeMnQCwlogRH7E0fMIQg8vH6bhqo,20891
22
+ upgini/resource_bundle/strings.properties,sha256=X0-yWoK3uAMZdlXk3IhGOgqqa5XC4YnElK-olp0sbRM,21137
23
23
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  upgini/sampler/base.py,sha256=X2PVsfZ3Rl7twpFDh5UWyxqY2K_jcMGxZ2NcHLwFRj4,6489
25
25
  upgini/sampler/random_under_sampler.py,sha256=whX_f_TtalHH8Seyn_7n3sX_TSiDHeYfALmme9saqDg,4082
@@ -39,8 +39,8 @@ upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3
39
39
  upgini/utils/target_utils.py,sha256=3eHrDy_Dc9ozuOwHGnGA705m9glCxKmjB-DfLrflqiA,1370
40
40
  upgini/utils/track_info.py,sha256=O_oL4gy1jH0DVgtiUeZAW0YKCeRT4B_bzH_SZYkFaOE,4076
41
41
  upgini/utils/warning_counter.py,sha256=vnmdFo5-7GBkU2bK9h_uC0K0Y_wtfcYstxOdeRfacO0,228
42
- upgini-1.1.131a4.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
43
- upgini-1.1.131a4.dist-info/METADATA,sha256=7Kl2Gam67rQ9-3YTItd4M4URPea6xDW75zJ1MFmJlss,43229
44
- upgini-1.1.131a4.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
45
- upgini-1.1.131a4.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
46
- upgini-1.1.131a4.dist-info/RECORD,,
42
+ upgini-1.1.132a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
43
+ upgini-1.1.132a1.dist-info/METADATA,sha256=wEsepCklPcYgAQM8PGSh9OB2Fu6I8qmKNYqQKl867Do,43229
44
+ upgini-1.1.132a1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
45
+ upgini-1.1.132a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
46
+ upgini-1.1.132a1.dist-info/RECORD,,