upgini 1.1.278a1__tar.gz → 1.1.279__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (89) hide show
  1. upgini-1.1.279/.gitignore +156 -0
  2. {upgini-1.1.278a1/src/upgini.egg-info → upgini-1.1.279}/PKG-INFO +18 -20
  3. upgini-1.1.279/pyproject.toml +102 -0
  4. upgini-1.1.279/src/upgini/__about__.py +1 -0
  5. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/ads_management/ads_manager.py +4 -2
  6. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/autofe/all_operands.py +3 -2
  7. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/autofe/binary.py +2 -1
  8. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/autofe/date.py +2 -1
  9. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/autofe/feature.py +1 -1
  10. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/autofe/groupby.py +3 -1
  11. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/autofe/operand.py +4 -3
  12. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/autofe/unary.py +2 -1
  13. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/autofe/vector.py +2 -0
  14. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/dataset.py +6 -15
  15. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/errors.py +1 -1
  16. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/features_enricher.py +102 -214
  17. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/http.py +11 -10
  18. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/mdc/__init__.py +1 -3
  19. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/mdc/context.py +4 -6
  20. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/metadata.py +5 -10
  21. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/metrics.py +102 -100
  22. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/normalizer/phone_normalizer.py +1 -1
  23. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/resource_bundle/__init__.py +5 -5
  24. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/resource_bundle/strings.properties +0 -1
  25. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/sampler/base.py +1 -4
  26. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/sampler/random_under_sampler.py +2 -5
  27. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/search_task.py +4 -4
  28. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/spinner.py +1 -1
  29. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/__init__.py +1 -1
  30. upgini-1.1.279/src/upgini/utils/base_search_key_detector.py +25 -0
  31. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/blocked_time_series.py +4 -2
  32. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/country_utils.py +1 -1
  33. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/custom_loss_utils.py +3 -2
  34. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/cv_utils.py +2 -2
  35. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/datetime_utils.py +20 -15
  36. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/deduplicate_utils.py +1 -11
  37. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/email_utils.py +2 -7
  38. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/fallback_progress_bar.py +1 -1
  39. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/progress_bar.py +1 -1
  40. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/sklearn_ext.py +14 -13
  41. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/track_info.py +2 -2
  42. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/version_validator.py +2 -2
  43. upgini-1.1.278a1/PKG-INFO +0 -844
  44. upgini-1.1.278a1/pyproject.toml +0 -10
  45. upgini-1.1.278a1/setup.cfg +0 -4
  46. upgini-1.1.278a1/setup.py +0 -104
  47. upgini-1.1.278a1/src/upgini/fingerprint.js +0 -8
  48. upgini-1.1.278a1/src/upgini/utils/base_search_key_detector.py +0 -27
  49. upgini-1.1.278a1/src/upgini.egg-info/SOURCES.txt +0 -83
  50. upgini-1.1.278a1/src/upgini.egg-info/dependency_links.txt +0 -1
  51. upgini-1.1.278a1/src/upgini.egg-info/requires.txt +0 -13
  52. upgini-1.1.278a1/src/upgini.egg-info/top_level.txt +0 -1
  53. upgini-1.1.278a1/tests/test_autofe_operands.py +0 -94
  54. upgini-1.1.278a1/tests/test_binary_dataset.py +0 -47
  55. upgini-1.1.278a1/tests/test_blocked_time_series.py +0 -80
  56. upgini-1.1.278a1/tests/test_categorical_dataset.py +0 -44
  57. upgini-1.1.278a1/tests/test_continuous_dataset.py +0 -47
  58. upgini-1.1.278a1/tests/test_country_utils.py +0 -51
  59. upgini-1.1.278a1/tests/test_custom_loss_utils.py +0 -50
  60. upgini-1.1.278a1/tests/test_datetime_utils.py +0 -213
  61. upgini-1.1.278a1/tests/test_email_utils.py +0 -97
  62. upgini-1.1.278a1/tests/test_etalon_validation.py +0 -786
  63. upgini-1.1.278a1/tests/test_features_enricher.py +0 -2670
  64. upgini-1.1.278a1/tests/test_metrics.py +0 -1365
  65. upgini-1.1.278a1/tests/test_phone_utils.py +0 -31
  66. upgini-1.1.278a1/tests/test_postal_code_utils.py +0 -31
  67. upgini-1.1.278a1/tests/test_target_utils.py +0 -194
  68. upgini-1.1.278a1/tests/test_widget.py +0 -432
  69. {upgini-1.1.278a1 → upgini-1.1.279}/LICENSE +0 -0
  70. {upgini-1.1.278a1 → upgini-1.1.279}/README.md +0 -0
  71. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/__init__.py +0 -0
  72. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/ads.py +0 -0
  73. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/ads_management/__init__.py +0 -0
  74. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/autofe/__init__.py +0 -0
  75. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/data_source/__init__.py +0 -0
  76. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/data_source/data_source_publisher.py +0 -0
  77. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/normalizer/__init__.py +0 -0
  78. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/resource_bundle/exceptions.py +0 -0
  79. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  80. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/sampler/__init__.py +0 -0
  81. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/sampler/utils.py +0 -0
  82. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/display_utils.py +0 -0
  83. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/features_validator.py +0 -0
  84. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/format.py +0 -0
  85. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/ip_utils.py +0 -0
  86. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/phone_utils.py +0 -0
  87. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/postal_code_utils.py +0 -0
  88. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/target_utils.py +0 -0
  89. {upgini-1.1.278a1 → upgini-1.1.279}/src/upgini/utils/warning_counter.py +0 -0
@@ -0,0 +1,156 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ env8/
109
+ env9/
110
+ env10/
111
+ .env10/
112
+ .env310/
113
+ env11/
114
+ venv/
115
+ ENV/
116
+ env.bak/
117
+ venv.bak/
118
+
119
+ # Spyder project settings
120
+ .spyderproject
121
+ .spyproject
122
+
123
+ # Rope project settings
124
+ .ropeproject
125
+
126
+ # mkdocs documentation
127
+ /site
128
+
129
+ # mypy
130
+ .mypy_cache/
131
+ .dmypy.json
132
+ dmypy.json
133
+
134
+ # Pyre type checker
135
+ .pyre/
136
+
137
+ # IDE
138
+ .vscode/
139
+ .idea/
140
+
141
+ # macOS
142
+ .DS_Store
143
+
144
+ # Other
145
+ .cache/
146
+ activate_venv.sh
147
+ test-results/
148
+ test_notebooks/
149
+ publish.sh
150
+ catboost_info/
151
+ build/
152
+ playgroung.ipynb
153
+ fingerprint.js
154
+ envVars.txt
155
+ .ruff_cache
156
+ .jupyter
@@ -1,14 +1,13 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.278a1
3
+ Version: 1.1.279
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
- Home-page: https://upgini.com/
6
- Author: Upgini Developers
7
- Author-email: madewithlove@upgini.com
8
- License: BSD 3-Clause License
9
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
+ Project-URL: Homepage, https://upgini.com/
10
7
  Project-URL: Source, https://github.com/upgini/upgini
11
- Keywords: data science,machine learning,data mining,automl,data search
8
+ Author-email: Upgini Developers <madewithlove@upgini.com>
9
+ License-File: LICENSE
10
+ Keywords: automl,data mining,data science,data search,machine learning
12
11
  Classifier: Development Status :: 5 - Production/Stable
13
12
  Classifier: Intended Audience :: Customer Service
14
13
  Classifier: Intended Audience :: Developers
@@ -23,22 +22,21 @@ Classifier: Programming Language :: Python :: 3.9
23
22
  Classifier: Programming Language :: Python :: 3.10
24
23
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
24
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
26
- Requires-Python: >=3.8,<3.11
27
- Description-Content-Type: text/markdown
28
- License-File: LICENSE
29
- Requires-Dist: python-dateutil>=2.8.0
30
- Requires-Dist: requests>=2.8.0
31
- Requires-Dist: pandas<3.0.0,>=1.1.0
32
- Requires-Dist: numpy>=1.19.0
33
- Requires-Dist: scikit-learn>=1.3.0
34
- Requires-Dist: pydantic<2.0.0,>=1.8.2
35
- Requires-Dist: fastparquet>=0.8.1
36
- Requires-Dist: python-json-logger>=2.0.2
25
+ Requires-Python: <3.11,>=3.8
37
26
  Requires-Dist: catboost>=1.0.3
27
+ Requires-Dist: fastparquet>=0.8.1
28
+ Requires-Dist: ipywidgets>=8.1.0
38
29
  Requires-Dist: lightgbm>=3.3.2
30
+ Requires-Dist: numpy>=1.19.0
31
+ Requires-Dist: pandas<3.0.0,>=1.1.0
32
+ Requires-Dist: pydantic<2.0.0,>=1.8.2
39
33
  Requires-Dist: pyjwt>=2.8.0
34
+ Requires-Dist: python-dateutil>=2.8.0
35
+ Requires-Dist: python-json-logger>=2.0.2
36
+ Requires-Dist: requests>=2.8.0
37
+ Requires-Dist: scikit-learn>=1.3.0
40
38
  Requires-Dist: xhtml2pdf==0.2.11
41
- Requires-Dist: ipywidgets>=8.1.0
39
+ Description-Content-Type: text/markdown
42
40
 
43
41
 
44
42
  <!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> : low-code feature search and enrichment library for machine learning </h2> -->
@@ -841,4 +839,4 @@ Some convenient ways to start contributing are:
841
839
  - [More perks for registered users](https://profile.upgini.com)
842
840
 
843
841
  <sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
844
- Please report it here.</a></sup>
842
+ Please report it here.</a></sup>
@@ -0,0 +1,102 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "upgini"
7
+ dynamic = ["version"]
8
+ description = "Intelligent data search & enrichment for Machine Learning"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8,<3.11"
11
+ authors = [
12
+ { name = "Upgini Developers", email = "madewithlove@upgini.com" },
13
+ ]
14
+ keywords = [
15
+ "automl",
16
+ "data mining",
17
+ "data science",
18
+ "data search",
19
+ "machine learning",
20
+ ]
21
+ classifiers = [
22
+ "Development Status :: 5 - Production/Stable",
23
+ "Intended Audience :: Customer Service",
24
+ "Intended Audience :: Developers",
25
+ "Intended Audience :: Financial and Insurance Industry",
26
+ "Intended Audience :: Information Technology",
27
+ "Intended Audience :: Science/Research",
28
+ "Intended Audience :: Telecommunications Industry",
29
+ "License :: OSI Approved :: BSD License",
30
+ "Operating System :: OS Independent",
31
+ "Programming Language :: Python :: 3.8",
32
+ "Programming Language :: Python :: 3.9",
33
+ "Programming Language :: Python :: 3.10",
34
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
35
+ "Topic :: Scientific/Engineering :: Information Analysis",
36
+ ]
37
+ dependencies = [
38
+ "catboost>=1.0.3",
39
+ "fastparquet>=0.8.1",
40
+ "ipywidgets>=8.1.0",
41
+ "lightgbm>=3.3.2",
42
+ "numpy>=1.19.0",
43
+ "pandas>=1.1.0,<3.0.0",
44
+ "pydantic>=1.8.2,<2.0.0",
45
+ "pyjwt>=2.8.0",
46
+ "python-dateutil>=2.8.0",
47
+ "python-json-logger>=2.0.2",
48
+ "requests>=2.8.0",
49
+ "scikit-learn>=1.3.0",
50
+ "xhtml2pdf==0.2.11",
51
+ ]
52
+
53
+ [project.urls]
54
+ "Bug Reports" = "https://github.com/upgini/upgini/issues"
55
+ Homepage = "https://upgini.com/"
56
+ Source = "https://github.com/upgini/upgini"
57
+
58
+ [tool.hatch.version]
59
+ path = "src/upgini/__about__.py"
60
+
61
+ [tool.hatch.build.targets.sdist]
62
+ include = [
63
+ "src"
64
+ ]
65
+
66
+ [tool.hatch.build.targets.wheel]
67
+ packages = [
68
+ "src/upgini"
69
+ ]
70
+
71
+ [tool.hatch.envs.default]
72
+ type = "virtual"
73
+ python = "3.10"
74
+
75
+ [tool.hatch.envs.test]
76
+ dependencies = [
77
+ "coverage[toml]",
78
+ "pytest",
79
+ "pytest-cov",
80
+ "requests-mock",
81
+ ]
82
+
83
+ [tool.hatch.envs.test.scripts]
84
+ cov = 'pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=upgini --cov=tests {args}'
85
+ format = "black {args}"
86
+ lint = "ruff check {args}"
87
+
88
+ test_binary = 'pytest -s -vv tests/test_binary_dataset.py'
89
+
90
+ [[tool.hatch.envs.test.matrix]]
91
+ python = ["3.8", "3.9", "3.10"]
92
+
93
+ [tool.black]
94
+ line-length = 120
95
+
96
+ [tool.isort]
97
+ profile = "black"
98
+
99
+ [tool.pytest.ini_options]
100
+ pythonpath = [
101
+ "./src"
102
+ ]
@@ -0,0 +1 @@
1
+ __version__ = "1.1.279"
@@ -1,9 +1,11 @@
1
1
  import time
2
- from typing import Dict, Optional
3
2
  import uuid
3
+ from typing import Dict, Optional
4
+
5
+ import pandas as pd
6
+
4
7
  from upgini.http import get_rest_client
5
8
  from upgini.spinner import Spinner
6
- import pandas as pd
7
9
 
8
10
 
9
11
  class AdsManager:
@@ -1,9 +1,10 @@
1
1
  from typing import Dict
2
+
3
+ from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
2
4
  from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
3
5
  from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
4
6
  from upgini.autofe.operand import Operand
5
- from upgini.autofe.unary import Abs, Log, Residual, Sqrt, Square, Sigmoid, Floor, Freq
6
- from upgini.autofe.binary import Min, Max, Add, Subtract, Multiply, Divide, Sim
7
+ from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
7
8
  from upgini.autofe.vector import Mean, Sum
8
9
 
9
10
  ALL_OPERANDS: Dict[str, Operand] = {
@@ -1,9 +1,10 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  import numpy as np
3
2
  import pandas as pd
4
3
  from numpy import dot
5
4
  from numpy.linalg import norm
6
5
 
6
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
7
+
7
8
 
8
9
  class Min(PandasOperand):
9
10
  name = "min"
@@ -1,8 +1,9 @@
1
1
  from typing import Any, Optional, Union
2
+
2
3
  import numpy as np
3
4
  import pandas as pd
4
- from pydantic import BaseModel
5
5
  from pandas.core.arrays.timedeltas import TimedeltaArray
6
+ from pydantic import BaseModel
6
7
 
7
8
  from upgini.autofe.operand import PandasOperand
8
9
 
@@ -215,7 +215,7 @@ class Feature:
215
215
  return Column(string)
216
216
 
217
217
  def is_trivial_char(c: str) -> bool:
218
- return not (c in "()+-*/,")
218
+ return c not in "()+-*/,"
219
219
 
220
220
  def find_prev(string: str) -> int:
221
221
  if string[-1] != ")":
@@ -1,7 +1,9 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  from typing import Optional
2
+
3
3
  import pandas as pd
4
4
 
5
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
6
+
5
7
 
6
8
  class GroupByThenAgg(PandasOperand, VectorizableMixin):
7
9
  agg: Optional[str]
@@ -1,8 +1,9 @@
1
- from pydantic import BaseModel
2
- from typing import Dict, List, Optional, Tuple, Union
3
1
  import abc
4
- import pandas as pd
2
+ from typing import Dict, List, Optional, Tuple, Union
3
+
5
4
  import numpy as np
5
+ import pandas as pd
6
+ from pydantic import BaseModel
6
7
 
7
8
 
8
9
  class Operand(BaseModel):
@@ -1,7 +1,8 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  import numpy as np
3
2
  import pandas as pd
4
3
 
4
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
5
+
5
6
 
6
7
  class Abs(PandasOperand, VectorizableMixin):
7
8
  name = "abs"
@@ -1,5 +1,7 @@
1
1
  from typing import List
2
+
2
3
  import pandas as pd
4
+
3
5
  from upgini.autofe.operand import PandasOperand, VectorizableMixin
4
6
 
5
7
 
@@ -15,17 +15,15 @@ from pandas.api.types import (
15
15
  is_float_dtype,
16
16
  is_integer_dtype,
17
17
  is_numeric_dtype,
18
+ is_object_dtype,
18
19
  is_period_dtype,
19
20
  is_string_dtype,
20
- is_object_dtype,
21
21
  )
22
22
 
23
23
  from upgini.errors import ValidationError
24
24
  from upgini.http import ProgressStage, SearchProgress, _RestClient
25
25
  from upgini.metadata import (
26
- ENTITY_SYSTEM_RECORD_ID,
27
26
  EVAL_SET_INDEX,
28
- SEARCH_KEY_UNNEST,
29
27
  SYSTEM_COLUMNS,
30
28
  SYSTEM_RECORD_ID,
31
29
  TARGET,
@@ -81,7 +79,6 @@ class Dataset: # (pd.DataFrame):
81
79
  path: Optional[str] = None,
82
80
  meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
83
81
  search_keys: Optional[List[Tuple[str, ...]]] = None,
84
- unnest_search_keys: Optional[Dict[str, str]] = None,
85
82
  model_task_type: Optional[ModelTaskType] = None,
86
83
  random_state: Optional[int] = None,
87
84
  rest_client: Optional[_RestClient] = None,
@@ -98,7 +95,7 @@ class Dataset: # (pd.DataFrame):
98
95
  data = pd.read_csv(path, **kwargs)
99
96
  else:
100
97
  # try different separators: , ; \t ...
101
- with open(path, mode="r") as csvfile:
98
+ with open(path) as csvfile:
102
99
  sep = csv.Sniffer().sniff(csvfile.read(2048)).delimiter
103
100
  kwargs["sep"] = sep
104
101
  data = pd.read_csv(path, **kwargs)
@@ -116,7 +113,6 @@ class Dataset: # (pd.DataFrame):
116
113
  self.description = description
117
114
  self.meaning_types = meaning_types
118
115
  self.search_keys = search_keys
119
- self.unnest_search_keys = unnest_search_keys
120
116
  self.ignore_columns = []
121
117
  self.hierarchical_group_keys = []
122
118
  self.hierarchical_subgroup_keys = []
@@ -176,7 +172,7 @@ class Dataset: # (pd.DataFrame):
176
172
  new_columns = []
177
173
  dup_counter = 0
178
174
  for column in self.data.columns:
179
- if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]:
175
+ if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
180
176
  self.columns_renaming[column] = column
181
177
  new_columns.append(column)
182
178
  continue
@@ -255,7 +251,7 @@ class Dataset: # (pd.DataFrame):
255
251
  @staticmethod
256
252
  def _ip_to_int(ip: Optional[_BaseAddress]) -> Optional[int]:
257
253
  try:
258
- if isinstance(ip, IPv4Address) or isinstance(ip, IPv6Address):
254
+ if isinstance(ip, (IPv4Address, IPv6Address)):
259
255
  return int(ip)
260
256
  except Exception:
261
257
  pass
@@ -263,7 +259,7 @@ class Dataset: # (pd.DataFrame):
263
259
  @staticmethod
264
260
  def _ip_to_int_str(ip: Optional[_BaseAddress]) -> Optional[str]:
265
261
  try:
266
- if isinstance(ip, IPv4Address) or isinstance(ip, IPv6Address):
262
+ if isinstance(ip, (IPv4Address, IPv6Address)):
267
263
  return str(int(ip))
268
264
  except Exception:
269
265
  pass
@@ -357,9 +353,7 @@ class Dataset: # (pd.DataFrame):
357
353
 
358
354
  if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
359
355
  try:
360
- self.data[postal_code] = (
361
- self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
362
- )
356
+ self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
363
357
  except Exception:
364
358
  pass
365
359
  elif is_float_dtype(self.data[postal_code]):
@@ -809,9 +803,6 @@ class Dataset: # (pd.DataFrame):
809
803
  meaningType=meaning_type,
810
804
  minMaxValues=min_max_values,
811
805
  )
812
- if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
813
- column_meta.isUnnest = True
814
- column_meta.unnestKeyNames = self.unnest_search_keys[column_meta.originalName]
815
806
 
816
807
  columns.append(column_meta)
817
808
 
@@ -16,7 +16,7 @@ class UnauthorizedError(HttpError):
16
16
  """Unauthorized error from REST API."""
17
17
 
18
18
  def __init__(self, message, status_code):
19
- message = "Unauthorized, please check your authorization token ({})".format(message)
19
+ message = f"Unauthorized, please check your authorization token ({message})"
20
20
  super(UnauthorizedError, self).__init__(message, status_code)
21
21
 
22
22