upgini 1.1.279a2__tar.gz → 1.1.279a2.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (88) hide show
  1. upgini-1.1.279a2.dev1/.gitignore +154 -0
  2. {upgini-1.1.279a2/src/upgini.egg-info → upgini-1.1.279a2.dev1}/PKG-INFO +18 -20
  3. upgini-1.1.279a2.dev1/pyproject.toml +102 -0
  4. upgini-1.1.279a2.dev1/src/upgini/__about__.py +1 -0
  5. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/ads_management/ads_manager.py +4 -2
  6. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/autofe/all_operands.py +3 -2
  7. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/autofe/binary.py +2 -1
  8. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/autofe/date.py +2 -1
  9. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/autofe/feature.py +1 -1
  10. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/autofe/groupby.py +3 -1
  11. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/autofe/operand.py +4 -3
  12. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/autofe/unary.py +2 -1
  13. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/autofe/vector.py +2 -0
  14. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/dataset.py +4 -4
  15. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/errors.py +1 -1
  16. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/features_enricher.py +4 -4
  17. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/http.py +11 -10
  18. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/mdc/__init__.py +1 -3
  19. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/mdc/context.py +4 -6
  20. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/metadata.py +3 -0
  21. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/metrics.py +101 -99
  22. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/normalizer/phone_normalizer.py +1 -1
  23. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/resource_bundle/__init__.py +5 -5
  24. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/sampler/base.py +1 -4
  25. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/sampler/random_under_sampler.py +2 -5
  26. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/search_task.py +4 -4
  27. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/spinner.py +1 -1
  28. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/__init__.py +1 -1
  29. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/base_search_key_detector.py +2 -2
  30. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/blocked_time_series.py +4 -2
  31. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/country_utils.py +1 -1
  32. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/custom_loss_utils.py +3 -2
  33. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/cv_utils.py +2 -2
  34. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/datetime_utils.py +9 -3
  35. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/email_utils.py +2 -2
  36. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/fallback_progress_bar.py +1 -1
  37. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/progress_bar.py +1 -1
  38. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/sklearn_ext.py +14 -13
  39. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/track_info.py +2 -2
  40. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/version_validator.py +2 -2
  41. upgini-1.1.279a2/PKG-INFO +0 -844
  42. upgini-1.1.279a2/pyproject.toml +0 -10
  43. upgini-1.1.279a2/setup.cfg +0 -4
  44. upgini-1.1.279a2/setup.py +0 -104
  45. upgini-1.1.279a2/src/upgini/fingerprint.js +0 -8
  46. upgini-1.1.279a2/src/upgini.egg-info/SOURCES.txt +0 -83
  47. upgini-1.1.279a2/src/upgini.egg-info/dependency_links.txt +0 -1
  48. upgini-1.1.279a2/src/upgini.egg-info/requires.txt +0 -13
  49. upgini-1.1.279a2/src/upgini.egg-info/top_level.txt +0 -1
  50. upgini-1.1.279a2/tests/test_autofe_operands.py +0 -94
  51. upgini-1.1.279a2/tests/test_binary_dataset.py +0 -47
  52. upgini-1.1.279a2/tests/test_blocked_time_series.py +0 -80
  53. upgini-1.1.279a2/tests/test_categorical_dataset.py +0 -44
  54. upgini-1.1.279a2/tests/test_continuous_dataset.py +0 -47
  55. upgini-1.1.279a2/tests/test_country_utils.py +0 -51
  56. upgini-1.1.279a2/tests/test_custom_loss_utils.py +0 -50
  57. upgini-1.1.279a2/tests/test_datetime_utils.py +0 -213
  58. upgini-1.1.279a2/tests/test_email_utils.py +0 -99
  59. upgini-1.1.279a2/tests/test_etalon_validation.py +0 -767
  60. upgini-1.1.279a2/tests/test_features_enricher.py +0 -2661
  61. upgini-1.1.279a2/tests/test_metrics.py +0 -1365
  62. upgini-1.1.279a2/tests/test_phone_utils.py +0 -31
  63. upgini-1.1.279a2/tests/test_postal_code_utils.py +0 -31
  64. upgini-1.1.279a2/tests/test_target_utils.py +0 -194
  65. upgini-1.1.279a2/tests/test_widget.py +0 -432
  66. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/LICENSE +0 -0
  67. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/README.md +0 -0
  68. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/__init__.py +0 -0
  69. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/ads.py +0 -0
  70. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/ads_management/__init__.py +0 -0
  71. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/autofe/__init__.py +0 -0
  72. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/data_source/__init__.py +0 -0
  73. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/data_source/data_source_publisher.py +0 -0
  74. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/normalizer/__init__.py +0 -0
  75. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/resource_bundle/exceptions.py +0 -0
  76. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/resource_bundle/strings.properties +0 -0
  77. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  78. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/sampler/__init__.py +0 -0
  79. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/sampler/utils.py +0 -0
  80. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/deduplicate_utils.py +0 -0
  81. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/display_utils.py +0 -0
  82. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/features_validator.py +0 -0
  83. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/format.py +0 -0
  84. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/ip_utils.py +0 -0
  85. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/phone_utils.py +0 -0
  86. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/postal_code_utils.py +0 -0
  87. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/target_utils.py +0 -0
  88. {upgini-1.1.279a2 → upgini-1.1.279a2.dev1}/src/upgini/utils/warning_counter.py +0 -0
@@ -0,0 +1,154 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ env8/
109
+ env9/
110
+ env10/
111
+ .env10/
112
+ .env310/
113
+ env11/
114
+ venv/
115
+ ENV/
116
+ env.bak/
117
+ venv.bak/
118
+
119
+ # Spyder project settings
120
+ .spyderproject
121
+ .spyproject
122
+
123
+ # Rope project settings
124
+ .ropeproject
125
+
126
+ # mkdocs documentation
127
+ /site
128
+
129
+ # mypy
130
+ .mypy_cache/
131
+ .dmypy.json
132
+ dmypy.json
133
+
134
+ # Pyre type checker
135
+ .pyre/
136
+
137
+ # IDE
138
+ .vscode/
139
+ .idea/
140
+
141
+ # macOS
142
+ .DS_Store
143
+
144
+ # Other
145
+ .cache/
146
+ activate_venv.sh
147
+ test-results/
148
+ test_notebooks/
149
+ publish.sh
150
+ catboost_info/
151
+ build/
152
+ playgroung.ipynb
153
+ fingerprint.js
154
+ envVars.txt
@@ -1,14 +1,13 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.279a2
3
+ Version: 1.1.279a2.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
- Home-page: https://upgini.com/
6
- Author: Upgini Developers
7
- Author-email: madewithlove@upgini.com
8
- License: BSD 3-Clause License
9
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
+ Project-URL: Homepage, https://upgini.com/
10
7
  Project-URL: Source, https://github.com/upgini/upgini
11
- Keywords: data science,machine learning,data mining,automl,data search
8
+ Author-email: Upgini Developers <madewithlove@upgini.com>
9
+ License-File: LICENSE
10
+ Keywords: automl,data mining,data science,data search,machine learning
12
11
  Classifier: Development Status :: 5 - Production/Stable
13
12
  Classifier: Intended Audience :: Customer Service
14
13
  Classifier: Intended Audience :: Developers
@@ -23,22 +22,21 @@ Classifier: Programming Language :: Python :: 3.9
23
22
  Classifier: Programming Language :: Python :: 3.10
24
23
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
24
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
26
- Requires-Python: >=3.8,<3.11
27
- Description-Content-Type: text/markdown
28
- License-File: LICENSE
29
- Requires-Dist: python-dateutil>=2.8.0
30
- Requires-Dist: requests>=2.8.0
31
- Requires-Dist: pandas<3.0.0,>=1.1.0
32
- Requires-Dist: numpy>=1.19.0
33
- Requires-Dist: scikit-learn>=1.3.0
34
- Requires-Dist: pydantic<2.0.0,>=1.8.2
35
- Requires-Dist: fastparquet>=0.8.1
36
- Requires-Dist: python-json-logger>=2.0.2
25
+ Requires-Python: <3.11,>=3.8
37
26
  Requires-Dist: catboost>=1.0.3
27
+ Requires-Dist: fastparquet>=0.8.1
28
+ Requires-Dist: ipywidgets>=8.1.0
38
29
  Requires-Dist: lightgbm>=3.3.2
30
+ Requires-Dist: numpy>=1.19.0
31
+ Requires-Dist: pandas<3.0.0,>=1.1.0
32
+ Requires-Dist: pydantic<2.0.0,>=1.8.2
39
33
  Requires-Dist: pyjwt>=2.8.0
34
+ Requires-Dist: python-dateutil>=2.8.0
35
+ Requires-Dist: python-json-logger>=2.0.2
36
+ Requires-Dist: requests>=2.8.0
37
+ Requires-Dist: scikit-learn>=1.3.0
40
38
  Requires-Dist: xhtml2pdf==0.2.11
41
- Requires-Dist: ipywidgets>=8.1.0
39
+ Description-Content-Type: text/markdown
42
40
 
43
41
 
44
42
  <!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> : low-code feature search and enrichment library for machine learning </h2> -->
@@ -841,4 +839,4 @@ Some convenient ways to start contributing are:
841
839
  - [More perks for registered users](https://profile.upgini.com)
842
840
 
843
841
  <sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
844
- Please report it here.</a></sup>
842
+ Please report it here.</a></sup>
@@ -0,0 +1,102 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "upgini"
7
+ dynamic = ["version"]
8
+ description = "Intelligent data search & enrichment for Machine Learning"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8,<3.11"
11
+ authors = [
12
+ { name = "Upgini Developers", email = "madewithlove@upgini.com" },
13
+ ]
14
+ keywords = [
15
+ "automl",
16
+ "data mining",
17
+ "data science",
18
+ "data search",
19
+ "machine learning",
20
+ ]
21
+ classifiers = [
22
+ "Development Status :: 5 - Production/Stable",
23
+ "Intended Audience :: Customer Service",
24
+ "Intended Audience :: Developers",
25
+ "Intended Audience :: Financial and Insurance Industry",
26
+ "Intended Audience :: Information Technology",
27
+ "Intended Audience :: Science/Research",
28
+ "Intended Audience :: Telecommunications Industry",
29
+ "License :: OSI Approved :: BSD License",
30
+ "Operating System :: OS Independent",
31
+ "Programming Language :: Python :: 3.8",
32
+ "Programming Language :: Python :: 3.9",
33
+ "Programming Language :: Python :: 3.10",
34
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
35
+ "Topic :: Scientific/Engineering :: Information Analysis",
36
+ ]
37
+ dependencies = [
38
+ "catboost>=1.0.3",
39
+ "fastparquet>=0.8.1",
40
+ "ipywidgets>=8.1.0",
41
+ "lightgbm>=3.3.2",
42
+ "numpy>=1.19.0",
43
+ "pandas>=1.1.0,<3.0.0",
44
+ "pydantic>=1.8.2,<2.0.0",
45
+ "pyjwt>=2.8.0",
46
+ "python-dateutil>=2.8.0",
47
+ "python-json-logger>=2.0.2",
48
+ "requests>=2.8.0",
49
+ "scikit-learn>=1.3.0",
50
+ "xhtml2pdf==0.2.11",
51
+ ]
52
+
53
+ [project.urls]
54
+ "Bug Reports" = "https://github.com/upgini/upgini/issues"
55
+ Homepage = "https://upgini.com/"
56
+ Source = "https://github.com/upgini/upgini"
57
+
58
+ [tool.hatch.version]
59
+ path = "src/upgini/__about__.py"
60
+
61
+ [tool.hatch.build.targets.sdist]
62
+ include = [
63
+ "src"
64
+ ]
65
+
66
+ [tool.hatch.build.targets.wheel]
67
+ packages = [
68
+ "src/upgini"
69
+ ]
70
+
71
+ [tool.hatch.envs.default]
72
+ type = "virtual"
73
+ python = "3.10"
74
+
75
+ [tool.hatch.envs.test]
76
+ dependencies = [
77
+ "coverage[toml]",
78
+ "pytest",
79
+ "pytest-cov",
80
+ "requests-mock",
81
+ ]
82
+
83
+ [tool.hatch.envs.test.scripts]
84
+ cov = 'pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=upgini --cov=tests {args}'
85
+ format = "black {args}"
86
+ lint = "ruff check {args}"
87
+
88
+ test_binary = 'pytest -s -vv tests/test_binary_dataset.py'
89
+
90
+ [[tool.hatch.envs.test.matrix]]
91
+ python = ["3.8", "3.9", "3.10"]
92
+
93
+ [tool.black]
94
+ line-length = 120
95
+
96
+ [tool.isort]
97
+ profile = "black"
98
+
99
+ [tool.pytest.ini_options]
100
+ pythonpath = [
101
+ "./src"
102
+ ]
@@ -0,0 +1 @@
1
+ __version__ = "1.1.279a2.dev1"
@@ -1,9 +1,11 @@
1
1
  import time
2
- from typing import Dict, Optional
3
2
  import uuid
3
+ from typing import Dict, Optional
4
+
5
+ import pandas as pd
6
+
4
7
  from upgini.http import get_rest_client
5
8
  from upgini.spinner import Spinner
6
- import pandas as pd
7
9
 
8
10
 
9
11
  class AdsManager:
@@ -1,9 +1,10 @@
1
1
  from typing import Dict
2
+
3
+ from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
2
4
  from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
3
5
  from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
4
6
  from upgini.autofe.operand import Operand
5
- from upgini.autofe.unary import Abs, Log, Residual, Sqrt, Square, Sigmoid, Floor, Freq
6
- from upgini.autofe.binary import Min, Max, Add, Subtract, Multiply, Divide, Sim
7
+ from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
7
8
  from upgini.autofe.vector import Mean, Sum
8
9
 
9
10
  ALL_OPERANDS: Dict[str, Operand] = {
@@ -1,9 +1,10 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  import numpy as np
3
2
  import pandas as pd
4
3
  from numpy import dot
5
4
  from numpy.linalg import norm
6
5
 
6
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
7
+
7
8
 
8
9
  class Min(PandasOperand):
9
10
  name = "min"
@@ -1,8 +1,9 @@
1
1
  from typing import Any, Optional, Union
2
+
2
3
  import numpy as np
3
4
  import pandas as pd
4
- from pydantic import BaseModel
5
5
  from pandas.core.arrays.timedeltas import TimedeltaArray
6
+ from pydantic import BaseModel
6
7
 
7
8
  from upgini.autofe.operand import PandasOperand
8
9
 
@@ -215,7 +215,7 @@ class Feature:
215
215
  return Column(string)
216
216
 
217
217
  def is_trivial_char(c: str) -> bool:
218
- return not (c in "()+-*/,")
218
+ return c not in "()+-*/,"
219
219
 
220
220
  def find_prev(string: str) -> int:
221
221
  if string[-1] != ")":
@@ -1,7 +1,9 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  from typing import Optional
2
+
3
3
  import pandas as pd
4
4
 
5
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
6
+
5
7
 
6
8
  class GroupByThenAgg(PandasOperand, VectorizableMixin):
7
9
  agg: Optional[str]
@@ -1,8 +1,9 @@
1
- from pydantic import BaseModel
2
- from typing import Dict, List, Optional, Tuple, Union
3
1
  import abc
4
- import pandas as pd
2
+ from typing import Dict, List, Optional, Tuple, Union
3
+
5
4
  import numpy as np
5
+ import pandas as pd
6
+ from pydantic import BaseModel
6
7
 
7
8
 
8
9
  class Operand(BaseModel):
@@ -1,7 +1,8 @@
1
- from upgini.autofe.operand import PandasOperand, VectorizableMixin
2
1
  import numpy as np
3
2
  import pandas as pd
4
3
 
4
+ from upgini.autofe.operand import PandasOperand, VectorizableMixin
5
+
5
6
 
6
7
  class Abs(PandasOperand, VectorizableMixin):
7
8
  name = "abs"
@@ -1,5 +1,7 @@
1
1
  from typing import List
2
+
2
3
  import pandas as pd
4
+
3
5
  from upgini.autofe.operand import PandasOperand, VectorizableMixin
4
6
 
5
7
 
@@ -15,9 +15,9 @@ from pandas.api.types import (
15
15
  is_float_dtype,
16
16
  is_integer_dtype,
17
17
  is_numeric_dtype,
18
+ is_object_dtype,
18
19
  is_period_dtype,
19
20
  is_string_dtype,
20
- is_object_dtype,
21
21
  )
22
22
 
23
23
  from upgini.errors import ValidationError
@@ -95,7 +95,7 @@ class Dataset: # (pd.DataFrame):
95
95
  data = pd.read_csv(path, **kwargs)
96
96
  else:
97
97
  # try different separators: , ; \t ...
98
- with open(path, mode="r") as csvfile:
98
+ with open(path) as csvfile:
99
99
  sep = csv.Sniffer().sniff(csvfile.read(2048)).delimiter
100
100
  kwargs["sep"] = sep
101
101
  data = pd.read_csv(path, **kwargs)
@@ -251,7 +251,7 @@ class Dataset: # (pd.DataFrame):
251
251
  @staticmethod
252
252
  def _ip_to_int(ip: Optional[_BaseAddress]) -> Optional[int]:
253
253
  try:
254
- if isinstance(ip, IPv4Address) or isinstance(ip, IPv6Address):
254
+ if isinstance(ip, (IPv4Address, IPv6Address)):
255
255
  return int(ip)
256
256
  except Exception:
257
257
  pass
@@ -259,7 +259,7 @@ class Dataset: # (pd.DataFrame):
259
259
  @staticmethod
260
260
  def _ip_to_int_str(ip: Optional[_BaseAddress]) -> Optional[str]:
261
261
  try:
262
- if isinstance(ip, IPv4Address) or isinstance(ip, IPv6Address):
262
+ if isinstance(ip, (IPv4Address, IPv6Address)):
263
263
  return str(int(ip))
264
264
  except Exception:
265
265
  pass
@@ -16,7 +16,7 @@ class UnauthorizedError(HttpError):
16
16
  """Unauthorized error from REST API."""
17
17
 
18
18
  def __init__(self, message, status_code):
19
- message = "Unauthorized, please check your authorization token ({})".format(message)
19
+ message = f"Unauthorized, please check your authorization token ({message})"
20
20
  super(UnauthorizedError, self).__init__(message, status_code)
21
21
 
22
22
 
@@ -2548,7 +2548,7 @@ class FeaturesEnricher(TransformerMixin):
2548
2548
  validated_X = X.copy()
2549
2549
  elif isinstance(X, pd.Series):
2550
2550
  validated_X = X.to_frame()
2551
- elif isinstance(X, np.ndarray) or isinstance(X, list):
2551
+ elif isinstance(X, (list, np.ndarray)):
2552
2552
  validated_X = pd.DataFrame(X)
2553
2553
  renaming = {c: str(c) for c in validated_X.columns}
2554
2554
  validated_X = validated_X.rename(columns=renaming)
@@ -2637,7 +2637,7 @@ class FeaturesEnricher(TransformerMixin):
2637
2637
  validated_eval_X = eval_X.copy()
2638
2638
  elif isinstance(eval_X, pd.Series):
2639
2639
  validated_eval_X = eval_X.to_frame()
2640
- elif isinstance(eval_X, np.ndarray) or isinstance(eval_X, list):
2640
+ elif isinstance(eval_X, (list, np.ndarray)):
2641
2641
  validated_eval_X = pd.DataFrame(eval_X)
2642
2642
  renaming = {c: str(c) for c in validated_eval_X.columns}
2643
2643
  validated_eval_X = validated_eval_X.rename(columns=renaming)
@@ -2819,7 +2819,7 @@ class FeaturesEnricher(TransformerMixin):
2819
2819
  )
2820
2820
 
2821
2821
  def sample(df):
2822
- if isinstance(df, pd.Series) or isinstance(df, pd.DataFrame):
2822
+ if isinstance(df, (pd.DataFrame, pd.Series)):
2823
2823
  return df.head(10)
2824
2824
  else:
2825
2825
  return df[:10]
@@ -3693,7 +3693,7 @@ class FeaturesEnricher(TransformerMixin):
3693
3693
  def sample(inp, sample_index):
3694
3694
  if _num_samples(inp) <= 1000:
3695
3695
  return inp
3696
- if isinstance(inp, pd.DataFrame) or isinstance(inp, pd.Series):
3696
+ if isinstance(inp, (pd.DataFrame, pd.Series)):
3697
3697
  return inp.sample(n=1000, random_state=random_state)
3698
3698
  if isinstance(inp, np.ndarray):
3699
3699
  return inp[sample_index]
@@ -22,6 +22,7 @@ from pydantic import BaseModel
22
22
  from pythonjsonlogger import jsonlogger
23
23
  from requests.exceptions import RequestException
24
24
 
25
+ from upgini.__about__ import __version__
25
26
  from upgini.errors import (
26
27
  HttpError,
27
28
  UnauthorizedError,
@@ -38,17 +39,17 @@ from upgini.metadata import (
38
39
  from upgini.resource_bundle import bundle
39
40
  from upgini.utils.track_info import get_track_metrics
40
41
 
41
- try:
42
- from importlib_metadata import version # type: ignore
42
+ # try:
43
+ # from importlib.metadata import version # type: ignore
43
44
 
44
- __version__ = version("upgini")
45
- except ImportError:
46
- try:
47
- from importlib.metadata import version # type: ignore
45
+ # __version__ = version("upgini")
46
+ # except ImportError:
47
+ # try:
48
+ # from importlib_metadata import version # type: ignore
48
49
 
49
- __version__ = version("upgini")
50
- except ImportError:
51
- __version__ = "Upgini wasn't installed"
50
+ # __version__ = version("upgini")
51
+ # except ImportError:
52
+ # __version__ = "Upgini wasn't installed"
52
53
 
53
54
  UPGINI_URL: str = "UPGINI_URL"
54
55
  UPGINI_API_KEY: str = "UPGINI_API_KEY"
@@ -925,7 +926,7 @@ def is_demo_api_key(api_token: Optional[str]) -> bool:
925
926
  return api_token is None or api_token == "" or api_token == DEMO_API_KEY
926
927
 
927
928
 
928
- @lru_cache()
929
+ @lru_cache
929
930
  def _get_rest_client(
930
931
  backend_url: str, api_token: str, client_ip: Optional[str] = None, client_visitorid: Optional[str] = None
931
932
  ) -> _RestClient:
@@ -1,15 +1,13 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  .. module: mdc
4
3
  .. moduleauthor:: Aljosha Friemann a.friemann@automate.wtf
5
4
  """
6
- from __future__ import absolute_import, division, print_function, unicode_literals
7
5
 
8
6
  import logging
9
7
 
10
- from upgini.mdc.context import new_log_context, get_mdc_fields
11
8
  from pythonjsonlogger import jsonlogger
12
9
 
10
+ from upgini.mdc.context import get_mdc_fields, new_log_context
13
11
 
14
12
  MDContext = new_log_context
15
13
  MDC = new_log_context
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  .. module: TODO
4
3
  :platform: TODO
@@ -7,12 +6,11 @@
7
6
  .. moduleauthor:: Aljosha Friemann a.friemann@automate.wtf
8
7
  """
9
8
 
10
- import time
11
- import uuid
9
+ import collections
12
10
  import logging
13
11
  import threading
14
- import collections
15
-
12
+ import time
13
+ import uuid
16
14
  from contextlib import contextmanager
17
15
 
18
16
  LOGGER = logging.getLogger(__name__)
@@ -32,7 +30,7 @@ def get_mdc_fields():
32
30
 
33
31
  @contextmanager
34
32
  def new_log_context(**kwargs):
35
- context_id = "mdc-{thread}-{context}".format(thread=threading.current_thread().ident, context=uuid.uuid4())
33
+ context_id = f"mdc-{threading.current_thread().ident}-{uuid.uuid4()}"
36
34
 
37
35
  LOGGER.debug("creating context %s", context_id)
38
36
 
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from enum import Enum
2
4
  from typing import Dict, List, Optional, Set
3
5
 
@@ -201,6 +203,7 @@ class FileMetadata(BaseModel):
201
203
  for c in self.columns:
202
204
  if c.name == name:
203
205
  return c
206
+ return None
204
207
 
205
208
  def search_types(self) -> Set[SearchKey]:
206
209
  search_keys = set()