upgini 1.1.274a4__py3-none-any.whl → 1.1.280a3418.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (48) hide show
  1. upgini/__about__.py +1 -0
  2. upgini/ads.py +6 -2
  3. upgini/ads_management/ads_manager.py +4 -2
  4. upgini/autofe/all_operands.py +4 -2
  5. upgini/autofe/binary.py +2 -1
  6. upgini/autofe/date.py +9 -2
  7. upgini/autofe/feature.py +1 -1
  8. upgini/autofe/groupby.py +3 -1
  9. upgini/autofe/operand.py +4 -3
  10. upgini/autofe/unary.py +20 -1
  11. upgini/autofe/vector.py +2 -0
  12. upgini/dataset.py +7 -6
  13. upgini/errors.py +1 -1
  14. upgini/features_enricher.py +44 -25
  15. upgini/http.py +11 -10
  16. upgini/mdc/__init__.py +1 -3
  17. upgini/mdc/context.py +4 -6
  18. upgini/metadata.py +3 -0
  19. upgini/metrics.py +110 -97
  20. upgini/normalizer/phone_normalizer.py +1 -1
  21. upgini/resource_bundle/__init__.py +5 -5
  22. upgini/resource_bundle/strings.properties +1 -1
  23. upgini/sampler/base.py +1 -4
  24. upgini/sampler/random_under_sampler.py +2 -5
  25. upgini/search_task.py +4 -4
  26. upgini/spinner.py +1 -1
  27. upgini/utils/__init__.py +3 -2
  28. upgini/utils/base_search_key_detector.py +2 -2
  29. upgini/utils/blocked_time_series.py +4 -2
  30. upgini/utils/country_utils.py +2 -2
  31. upgini/utils/custom_loss_utils.py +3 -2
  32. upgini/utils/cv_utils.py +2 -2
  33. upgini/utils/datetime_utils.py +25 -19
  34. upgini/utils/email_utils.py +3 -3
  35. upgini/utils/fallback_progress_bar.py +1 -1
  36. upgini/utils/features_validator.py +2 -1
  37. upgini/utils/progress_bar.py +1 -1
  38. upgini/utils/sklearn_ext.py +14 -13
  39. upgini/utils/target_utils.py +1 -1
  40. upgini/utils/track_info.py +27 -15
  41. upgini/version_validator.py +2 -2
  42. {upgini-1.1.274a4.dist-info → upgini-1.1.280a3418.post2.dist-info}/METADATA +21 -23
  43. upgini-1.1.280a3418.post2.dist-info/RECORD +62 -0
  44. {upgini-1.1.274a4.dist-info → upgini-1.1.280a3418.post2.dist-info}/WHEEL +1 -2
  45. upgini/fingerprint.js +0 -8
  46. upgini-1.1.274a4.dist-info/RECORD +0 -63
  47. upgini-1.1.274a4.dist-info/top_level.txt +0 -1
  48. {upgini-1.1.274a4.dist-info → upgini-1.1.280a3418.post2.dist-info/licenses}/LICENSE +0 -0
upgini/__about__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "1.1.280a3418-2"
upgini/ads.py CHANGED
@@ -5,7 +5,7 @@ from typing import Dict, Optional
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
- from pandas.api.types import is_string_dtype
8
+ from pandas.api.types import is_object_dtype, is_string_dtype
9
9
 
10
10
  from upgini import SearchKey
11
11
  from upgini.http import get_rest_client
@@ -34,7 +34,11 @@ def upload_user_ads(name: str, df: pd.DataFrame, search_keys: Dict[str, SearchKe
34
34
  if df[column_name].notnull().sum() < min_valid_rows_count:
35
35
  raise ValueError(bundle.get("ads_upload_to_many_empty_rows"))
36
36
  meaning_type = search_keys[column_name].value
37
- if meaning_type == FileColumnMeaningType.MSISDN and not is_string_dtype(df[column_name]):
37
+ if (
38
+ meaning_type == FileColumnMeaningType.MSISDN
39
+ and not is_string_dtype(df[column_name])
40
+ and not is_object_dtype(df[column_name])
41
+ ):
38
42
  df[column_name] = df[column_name].values.astype(np.int64).astype("string") # type: ignore
39
43
  else:
40
44
  meaning_type = FileColumnMeaningType.FEATURE
@@ -1,9 +1,11 @@
1
1
  import time
2
- from typing import Dict, Optional
3
2
  import uuid
3
+ from typing import Dict, Optional
4
+
5
+ import pandas as pd
6
+
4
7
  from upgini.http import get_rest_client
5
8
  from upgini.spinner import Spinner
6
- import pandas as pd
7
9
 
8
10
 
9
11
  class AdsManager:
@@ -1,9 +1,10 @@
1
1
  from typing import Dict
2
+
3
+ from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
2
4
  from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
3
5
  from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
4
6
  from upgini.autofe.operand import Operand
5
- from upgini.autofe.unary import Abs, Log, Residual, Sqrt, Square, Sigmoid, Floor, Freq
6
- from upgini.autofe.binary import Min, Max, Add, Subtract, Multiply, Divide, Sim
7
+ from upgini.autofe.unary import Abs, Bin, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
7
8
  from upgini.autofe.vector import Mean, Sum
8
9
 
9
10
  ALL_OPERANDS: Dict[str, Operand] = {
@@ -48,6 +49,7 @@ ALL_OPERANDS: Dict[str, Operand] = {
48
49
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=30, upper_bound=45),
49
50
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
50
51
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
52
+ Bin(),
51
53
  ]
52
54
  }
53
55
 
upgini/autofe/binary.py CHANGED
@@ -1,9 +1,10 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  import numpy as np
3
2
  import pandas as pd
4
3
  from numpy import dot
5
4
  from numpy.linalg import norm
6
5
 
6
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
7
+
7
8
 
8
9
  class Min(PandasOperand):
9
10
  name = "min"
upgini/autofe/date.py CHANGED
@@ -1,6 +1,8 @@
1
1
  from typing import Any, Optional, Union
2
+
2
3
  import numpy as np
3
4
  import pandas as pd
5
+ from pandas.core.arrays.timedeltas import TimedeltaArray
4
6
  from pydantic import BaseModel
5
7
 
6
8
  from upgini.autofe.operand import PandasOperand
@@ -73,8 +75,13 @@ class DateListDiff(PandasOperand, DateDiffMixin):
73
75
 
74
76
  return pd.Series(left - right.values).apply(lambda x: self._agg(self._diff(x)))
75
77
 
76
- def _diff(self, x):
77
- x = x / np.timedelta64(1, self.diff_unit)
78
+ def _diff(self, x: TimedeltaArray):
79
+ if self.diff_unit == "Y":
80
+ x = (x / 365 / 24 / 60 / 60 / 10**9).astype(int)
81
+ elif self.diff_unit == "M":
82
+ raise Exception("Unsupported difference unit: Month")
83
+ else:
84
+ x = x / np.timedelta64(1, self.diff_unit)
78
85
  return x[x > 0]
79
86
 
80
87
  def _agg(self, x):
upgini/autofe/feature.py CHANGED
@@ -215,7 +215,7 @@ class Feature:
215
215
  return Column(string)
216
216
 
217
217
  def is_trivial_char(c: str) -> bool:
218
- return not (c in "()+-*/,")
218
+ return c not in "()+-*/,"
219
219
 
220
220
  def find_prev(string: str) -> int:
221
221
  if string[-1] != ")":
upgini/autofe/groupby.py CHANGED
@@ -1,7 +1,9 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  from typing import Optional
2
+
3
3
  import pandas as pd
4
4
 
5
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
6
+
5
7
 
6
8
  class GroupByThenAgg(PandasOperand, VectorizableMixin):
7
9
  agg: Optional[str]
upgini/autofe/operand.py CHANGED
@@ -1,8 +1,9 @@
1
- from pydantic import BaseModel
2
- from typing import Dict, List, Optional, Tuple, Union
3
1
  import abc
4
- import pandas as pd
2
+ from typing import Dict, List, Optional, Tuple, Union
3
+
5
4
  import numpy as np
5
+ import pandas as pd
6
+ from pydantic import BaseModel
6
7
 
7
8
 
8
9
  class Operand(BaseModel):
upgini/autofe/unary.py CHANGED
@@ -1,7 +1,8 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  import numpy as np
3
2
  import pandas as pd
4
3
 
4
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
5
+
5
6
 
6
7
  class Abs(PandasOperand, VectorizableMixin):
7
8
  name = "abs"
@@ -110,3 +111,21 @@ class Freq(PandasOperand):
110
111
  def calculate_unary(self, data: pd.Series) -> pd.Series:
111
112
  value_counts = data.value_counts(normalize=True)
112
113
  return self._loc(data, value_counts)
114
+
115
+
116
+ class Bin(PandasOperand):
117
+ name = "bin"
118
+ is_unary = True
119
+ output_type = "int"
120
+ input_type = "discrete"
121
+
122
+ zero_bound_low: int
123
+ zero_bound_high: int
124
+ step: int
125
+
126
+ def calculate_unary(self, data: pd.Series) -> pd.Series:
127
+ res = pd.Series(np.zeros(data.shape), index=data.index, dtype="int")
128
+ res.update((data[data < self.zero_bound_low] - self.zero_bound_low) // self.step)
129
+ res.update((data[data >= self.zero_bound_high] - self.zero_bound_high) // self.step + 1)
130
+
131
+ return res
upgini/autofe/vector.py CHANGED
@@ -1,5 +1,7 @@
1
1
  from typing import List
2
+
2
3
  import pandas as pd
4
+
3
5
  from upgini.autofe.operand import PandasOperand, VectorizableMixin
4
6
 
5
7
 
upgini/dataset.py CHANGED
@@ -15,6 +15,7 @@ from pandas.api.types import (
15
15
  is_float_dtype,
16
16
  is_integer_dtype,
17
17
  is_numeric_dtype,
18
+ is_object_dtype,
18
19
  is_period_dtype,
19
20
  is_string_dtype,
20
21
  )
@@ -94,7 +95,7 @@ class Dataset: # (pd.DataFrame):
94
95
  data = pd.read_csv(path, **kwargs)
95
96
  else:
96
97
  # try different separators: , ; \t ...
97
- with open(path, mode="r") as csvfile:
98
+ with open(path) as csvfile:
98
99
  sep = csv.Sniffer().sniff(csvfile.read(2048)).delimiter
99
100
  kwargs["sep"] = sep
100
101
  data = pd.read_csv(path, **kwargs)
@@ -219,7 +220,7 @@ class Dataset: # (pd.DataFrame):
219
220
  """Check that string values less than maximum characters for LLM"""
220
221
  # self.logger.info("Validate too long string values")
221
222
  for col in self.data.columns:
222
- if is_string_dtype(self.data[col]):
223
+ if is_string_dtype(self.data[col]) or is_object_dtype(self.data[col]):
223
224
  max_length: int = self.data[col].astype("str").str.len().max()
224
225
  if max_length > self.MAX_STRING_FEATURE_LENGTH:
225
226
  self.data[col] = self.data[col].astype("str").str.slice(stop=self.MAX_STRING_FEATURE_LENGTH)
@@ -250,7 +251,7 @@ class Dataset: # (pd.DataFrame):
250
251
  @staticmethod
251
252
  def _ip_to_int(ip: Optional[_BaseAddress]) -> Optional[int]:
252
253
  try:
253
- if isinstance(ip, IPv4Address) or isinstance(ip, IPv6Address):
254
+ if isinstance(ip, (IPv4Address, IPv6Address)):
254
255
  return int(ip)
255
256
  except Exception:
256
257
  pass
@@ -258,7 +259,7 @@ class Dataset: # (pd.DataFrame):
258
259
  @staticmethod
259
260
  def _ip_to_int_str(ip: Optional[_BaseAddress]) -> Optional[str]:
260
261
  try:
261
- if isinstance(ip, IPv4Address) or isinstance(ip, IPv6Address):
262
+ if isinstance(ip, (IPv4Address, IPv6Address)):
262
263
  return str(int(ip))
263
264
  except Exception:
264
265
  pass
@@ -350,7 +351,7 @@ class Dataset: # (pd.DataFrame):
350
351
  if postal_code is not None and postal_code in self.data.columns:
351
352
  # self.logger.info("Normalize postal code")
352
353
 
353
- if is_string_dtype(self.data[postal_code]):
354
+ if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
354
355
  try:
355
356
  self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
356
357
  except Exception:
@@ -821,7 +822,7 @@ class Dataset: # (pd.DataFrame):
821
822
  return DataType.INT
822
823
  elif is_float_dtype(pandas_data_type):
823
824
  return DataType.DECIMAL
824
- elif is_string_dtype(pandas_data_type):
825
+ elif is_string_dtype(pandas_data_type) or is_object_dtype(pandas_data_type):
825
826
  return DataType.STRING
826
827
  else:
827
828
  msg = self.bundle.get("dataset_invalid_column_type").format(column_name, pandas_data_type)
upgini/errors.py CHANGED
@@ -16,7 +16,7 @@ class UnauthorizedError(HttpError):
16
16
  """Unauthorized error from REST API."""
17
17
 
18
18
  def __init__(self, message, status_code):
19
- message = "Unauthorized, please check your authorization token ({})".format(message)
19
+ message = f"Unauthorized, please check your authorization token ({message})"
20
20
  super(UnauthorizedError, self).__init__(message, status_code)
21
21
 
22
22
 
@@ -21,6 +21,7 @@ from pandas.api.types import (
21
21
  is_bool,
22
22
  is_datetime64_any_dtype,
23
23
  is_numeric_dtype,
24
+ is_object_dtype,
24
25
  is_period_dtype,
25
26
  is_string_dtype,
26
27
  )
@@ -424,6 +425,9 @@ class FeaturesEnricher(TransformerMixin):
424
425
 
425
426
  self.__validate_search_keys(self.search_keys, self.search_id)
426
427
 
428
+ # Validate client estimator params
429
+ self._get_client_cat_features(estimator, X, self.search_keys)
430
+
427
431
  try:
428
432
  self.X = X
429
433
  self.y = y
@@ -817,6 +821,7 @@ class FeaturesEnricher(TransformerMixin):
817
821
  trace_id = trace_id or str(uuid.uuid4())
818
822
  start_time = time.time()
819
823
  with MDC(trace_id=trace_id):
824
+ self.logger.info("Start calculate metrics")
820
825
  if len(args) > 0:
821
826
  msg = f"WARNING: Unsupported positional arguments for calculate_metrics: {args}"
822
827
  self.logger.warning(msg)
@@ -868,22 +873,9 @@ class FeaturesEnricher(TransformerMixin):
868
873
  self.__display_support_link(msg)
869
874
  return None
870
875
 
871
- cat_features = None
872
- search_keys_for_metrics = []
873
- if (
874
- estimator is not None
875
- and hasattr(estimator, "get_param")
876
- and estimator.get_param("cat_features") is not None
877
- ):
878
- cat_features = estimator.get_param("cat_features")
879
- if len(cat_features) > 0 and isinstance(cat_features[0], int):
880
- cat_features = [effective_X.columns[i] for i in cat_features]
881
- for cat_feature in cat_features:
882
- if cat_feature in self.search_keys:
883
- if self.search_keys[cat_feature] in [SearchKey.COUNTRY, SearchKey.POSTAL_CODE]:
884
- search_keys_for_metrics.append(cat_feature)
885
- else:
886
- raise ValidationError(self.bundle.get("cat_feature_search_key").format(cat_feature))
876
+ cat_features, search_keys_for_metrics = self._get_client_cat_features(
877
+ estimator, effective_X, self.search_keys
878
+ )
887
879
 
888
880
  prepared_data = self._prepare_data_for_metrics(
889
881
  trace_id=trace_id,
@@ -898,6 +890,7 @@ class FeaturesEnricher(TransformerMixin):
898
890
  search_keys_for_metrics=search_keys_for_metrics,
899
891
  progress_bar=progress_bar,
900
892
  progress_callback=progress_callback,
893
+ cat_features=cat_features,
901
894
  )
902
895
  if prepared_data is None:
903
896
  return None
@@ -1273,6 +1266,29 @@ class FeaturesEnricher(TransformerMixin):
1273
1266
 
1274
1267
  return _cv, groups
1275
1268
 
1269
+ def _get_client_cat_features(
1270
+ self, estimator: Optional[Any], X: pd.DataFrame, search_keys: Dict[str, SearchKey]
1271
+ ) -> Optional[List[str]]:
1272
+ cat_features = None
1273
+ search_keys_for_metrics = []
1274
+ if (
1275
+ estimator is not None
1276
+ and hasattr(estimator, "get_param")
1277
+ and estimator.get_param("cat_features") is not None
1278
+ ):
1279
+ cat_features = estimator.get_param("cat_features")
1280
+ if len(cat_features) > 0:
1281
+ if all([isinstance(f, int) for f in cat_features]):
1282
+ cat_features = [X.columns[i] for i in cat_features]
1283
+ self.logger.info(f"Collected categorical features {cat_features} from user estimator")
1284
+ for cat_feature in cat_features:
1285
+ if cat_feature in search_keys:
1286
+ if search_keys[cat_feature] in [SearchKey.COUNTRY, SearchKey.POSTAL_CODE]:
1287
+ search_keys_for_metrics.append(cat_feature)
1288
+ else:
1289
+ raise ValidationError(self.bundle.get("cat_feature_search_key").format(cat_feature))
1290
+ return cat_features, search_keys_for_metrics
1291
+
1276
1292
  def _prepare_data_for_metrics(
1277
1293
  self,
1278
1294
  trace_id: str,
@@ -1287,6 +1303,7 @@ class FeaturesEnricher(TransformerMixin):
1287
1303
  search_keys_for_metrics: Optional[List[str]] = None,
1288
1304
  progress_bar: Optional[ProgressBar] = None,
1289
1305
  progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
1306
+ cat_features: Optional[List[str]] = None,
1290
1307
  ):
1291
1308
  is_input_same_as_fit, X, y, eval_set = self._is_input_same_as_fit(X, y, eval_set)
1292
1309
  is_demo_dataset = hash_input(X, y, eval_set) in DEMO_DATASET_HASHES
@@ -1344,9 +1361,8 @@ class FeaturesEnricher(TransformerMixin):
1344
1361
 
1345
1362
  # Detect and drop high cardinality columns in train
1346
1363
  columns_with_high_cardinality = FeaturesValidator.find_high_cardinality(fitting_X)
1347
- columns_with_high_cardinality = [
1348
- c for c in columns_with_high_cardinality if c not in (self.generate_features or [])
1349
- ]
1364
+ non_excluding_columns = (self.generate_features or []) + (cat_features or [])
1365
+ columns_with_high_cardinality = [c for c in columns_with_high_cardinality if c not in non_excluding_columns]
1350
1366
  if len(columns_with_high_cardinality) > 0:
1351
1367
  self.logger.warning(
1352
1368
  f"High cardinality columns {columns_with_high_cardinality} will be dropped for metrics calculation"
@@ -2532,7 +2548,7 @@ class FeaturesEnricher(TransformerMixin):
2532
2548
  validated_X = X.copy()
2533
2549
  elif isinstance(X, pd.Series):
2534
2550
  validated_X = X.to_frame()
2535
- elif isinstance(X, np.ndarray) or isinstance(X, list):
2551
+ elif isinstance(X, (list, np.ndarray)):
2536
2552
  validated_X = pd.DataFrame(X)
2537
2553
  renaming = {c: str(c) for c in validated_X.columns}
2538
2554
  validated_X = validated_X.rename(columns=renaming)
@@ -2621,7 +2637,7 @@ class FeaturesEnricher(TransformerMixin):
2621
2637
  validated_eval_X = eval_X.copy()
2622
2638
  elif isinstance(eval_X, pd.Series):
2623
2639
  validated_eval_X = eval_X.to_frame()
2624
- elif isinstance(eval_X, np.ndarray) or isinstance(eval_X, list):
2640
+ elif isinstance(eval_X, (list, np.ndarray)):
2625
2641
  validated_eval_X = pd.DataFrame(eval_X)
2626
2642
  renaming = {c: str(c) for c in validated_eval_X.columns}
2627
2643
  validated_eval_X = validated_eval_X.rename(columns=renaming)
@@ -2803,7 +2819,7 @@ class FeaturesEnricher(TransformerMixin):
2803
2819
  )
2804
2820
 
2805
2821
  def sample(df):
2806
- if isinstance(df, pd.Series) or isinstance(df, pd.DataFrame):
2822
+ if isinstance(df, (pd.DataFrame, pd.Series)):
2807
2823
  return df.head(10)
2808
2824
  else:
2809
2825
  return df[:10]
@@ -2967,7 +2983,7 @@ class FeaturesEnricher(TransformerMixin):
2967
2983
 
2968
2984
  def __correct_target(self, df: pd.DataFrame) -> pd.DataFrame:
2969
2985
  target = df[self.TARGET_NAME]
2970
- if is_string_dtype(target):
2986
+ if is_string_dtype(target) or is_object_dtype(target):
2971
2987
  maybe_numeric_target = pd.to_numeric(target, errors="coerce")
2972
2988
  # If less than 5% is non numeric then leave this rows with NaN target and later it will be dropped
2973
2989
  if maybe_numeric_target.isna().sum() <= _num_samples(df) * 0.05:
@@ -3240,6 +3256,8 @@ class FeaturesEnricher(TransformerMixin):
3240
3256
  descriptions = []
3241
3257
  for m in autofe_meta:
3242
3258
  autofe_feature = Feature.from_formula(m.formula)
3259
+ orig_to_hashed = {base_column.original_name: base_column.hashed_name for base_column in m.base_columns}
3260
+ autofe_feature.rename_columns(orig_to_hashed)
3243
3261
  autofe_feature.set_display_index(m.display_index)
3244
3262
  if autofe_feature.op.is_vector:
3245
3263
  continue
@@ -3367,7 +3385,8 @@ class FeaturesEnricher(TransformerMixin):
3367
3385
  valid_search_keys[column_name] = SearchKey.CUSTOM_KEY
3368
3386
  else:
3369
3387
  if x[column_name].isnull().all() or (
3370
- is_string_dtype(x[column_name]) and (x[column_name].astype("string").str.strip() == "").all()
3388
+ (is_string_dtype(x[column_name]) or is_object_dtype(x[column_name]))
3389
+ and (x[column_name].astype("string").str.strip() == "").all()
3371
3390
  ):
3372
3391
  raise ValidationError(self.bundle.get("empty_search_key").format(column_name))
3373
3392
 
@@ -3674,7 +3693,7 @@ class FeaturesEnricher(TransformerMixin):
3674
3693
  def sample(inp, sample_index):
3675
3694
  if _num_samples(inp) <= 1000:
3676
3695
  return inp
3677
- if isinstance(inp, pd.DataFrame) or isinstance(inp, pd.Series):
3696
+ if isinstance(inp, (pd.DataFrame, pd.Series)):
3678
3697
  return inp.sample(n=1000, random_state=random_state)
3679
3698
  if isinstance(inp, np.ndarray):
3680
3699
  return inp[sample_index]
upgini/http.py CHANGED
@@ -22,6 +22,7 @@ from pydantic import BaseModel
22
22
  from pythonjsonlogger import jsonlogger
23
23
  from requests.exceptions import RequestException
24
24
 
25
+ from upgini.__about__ import __version__
25
26
  from upgini.errors import (
26
27
  HttpError,
27
28
  UnauthorizedError,
@@ -38,17 +39,17 @@ from upgini.metadata import (
38
39
  from upgini.resource_bundle import bundle
39
40
  from upgini.utils.track_info import get_track_metrics
40
41
 
41
- try:
42
- from importlib_metadata import version # type: ignore
42
+ # try:
43
+ # from importlib.metadata import version # type: ignore
43
44
 
44
- __version__ = version("upgini")
45
- except ImportError:
46
- try:
47
- from importlib.metadata import version # type: ignore
45
+ # __version__ = version("upgini")
46
+ # except ImportError:
47
+ # try:
48
+ # from importlib_metadata import version # type: ignore
48
49
 
49
- __version__ = version("upgini")
50
- except ImportError:
51
- __version__ = "Upgini wasn't installed"
50
+ # __version__ = version("upgini")
51
+ # except ImportError:
52
+ # __version__ = "Upgini wasn't installed"
52
53
 
53
54
  UPGINI_URL: str = "UPGINI_URL"
54
55
  UPGINI_API_KEY: str = "UPGINI_API_KEY"
@@ -925,7 +926,7 @@ def is_demo_api_key(api_token: Optional[str]) -> bool:
925
926
  return api_token is None or api_token == "" or api_token == DEMO_API_KEY
926
927
 
927
928
 
928
- @lru_cache()
929
+ @lru_cache
929
930
  def _get_rest_client(
930
931
  backend_url: str, api_token: str, client_ip: Optional[str] = None, client_visitorid: Optional[str] = None
931
932
  ) -> _RestClient:
upgini/mdc/__init__.py CHANGED
@@ -1,15 +1,13 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  .. module: mdc
4
3
  .. moduleauthor:: Aljosha Friemann a.friemann@automate.wtf
5
4
  """
6
- from __future__ import absolute_import, division, print_function, unicode_literals
7
5
 
8
6
  import logging
9
7
 
10
- from upgini.mdc.context import new_log_context, get_mdc_fields
11
8
  from pythonjsonlogger import jsonlogger
12
9
 
10
+ from upgini.mdc.context import get_mdc_fields, new_log_context
13
11
 
14
12
  MDContext = new_log_context
15
13
  MDC = new_log_context
upgini/mdc/context.py CHANGED
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  .. module: TODO
4
3
  :platform: TODO
@@ -7,12 +6,11 @@
7
6
  .. moduleauthor:: Aljosha Friemann a.friemann@automate.wtf
8
7
  """
9
8
 
10
- import time
11
- import uuid
9
+ import collections
12
10
  import logging
13
11
  import threading
14
- import collections
15
-
12
+ import time
13
+ import uuid
16
14
  from contextlib import contextmanager
17
15
 
18
16
  LOGGER = logging.getLogger(__name__)
@@ -32,7 +30,7 @@ def get_mdc_fields():
32
30
 
33
31
  @contextmanager
34
32
  def new_log_context(**kwargs):
35
- context_id = "mdc-{thread}-{context}".format(thread=threading.current_thread().ident, context=uuid.uuid4())
33
+ context_id = f"mdc-{threading.current_thread().ident}-{uuid.uuid4()}"
36
34
 
37
35
  LOGGER.debug("creating context %s", context_id)
38
36
 
upgini/metadata.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from enum import Enum
2
4
  from typing import Dict, List, Optional, Set
3
5
 
@@ -201,6 +203,7 @@ class FileMetadata(BaseModel):
201
203
  for c in self.columns:
202
204
  if c.name == name:
203
205
  return c
206
+ return None
204
207
 
205
208
  def search_types(self) -> Set[SearchKey]:
206
209
  search_keys = set()