vaex-ml 0.18.3__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vaex/ml/_version.py +2 -2
- vaex/ml/catboost.py +38 -39
- vaex/ml/lightgbm.py +9 -4
- vaex/ml/spec.json +1 -1
- vaex/ml/tensorflow.py +12 -11
- vaex/ml/transformations.py +2 -2
- vaex_ml-0.19.0.dist-info/METADATA +34 -0
- {vaex_ml-0.18.3.dist-info → vaex_ml-0.19.0.dist-info}/RECORD +12 -12
- {vaex_ml-0.18.3.dist-info → vaex_ml-0.19.0.dist-info}/WHEEL +1 -1
- {vaex_ml-0.18.3.dist-info → vaex_ml-0.19.0.dist-info}/entry_points.txt +0 -1
- vaex_ml-0.18.3.dist-info/METADATA +0 -20
- {vaex_ml-0.18.3.dist-info → vaex_ml-0.19.0.dist-info/licenses}/LICENSE.txt +0 -0
- {vaex_ml-0.18.3.dist-info → vaex_ml-0.19.0.dist-info}/top_level.txt +0 -0
vaex/ml/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = '0.
|
|
2
|
-
__version_tuple__ = (0,
|
|
1
|
+
__version__ = '0.19.0'
|
|
2
|
+
__version_tuple__ = (0, 19, 0)
|
vaex/ml/catboost.py
CHANGED
|
@@ -10,50 +10,49 @@ from . import generate
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
import catboost
|
|
12
12
|
|
|
13
|
-
|
|
14
13
|
@vaex.serialize.register
|
|
15
14
|
@generate.register
|
|
16
15
|
class CatBoostModel(state.HasState):
|
|
17
16
|
'''The CatBoost algorithm.
|
|
18
17
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
18
|
+
This class provides an interface to the CatBoost algorithm.
|
|
19
|
+
CatBoost is a fast, scalable, high performance Gradient Boosting on
|
|
20
|
+
Decision Trees library, used for ranking, classification, regression and
|
|
21
|
+
other machine learning tasks. For more information please visit
|
|
22
|
+
https://github.com/catboost/catboost
|
|
23
|
+
|
|
24
|
+
Example:
|
|
25
|
+
|
|
26
|
+
>>> import vaex
|
|
27
|
+
>>> import vaex.ml.catboost
|
|
28
|
+
>>> df = vaex.datasets.iris()
|
|
29
|
+
>>> features = ['sepal_width', 'petal_length', 'sepal_length', 'petal_width']
|
|
30
|
+
>>> df_train, df_test = df.ml.train_test_split()
|
|
31
|
+
>>> params = {
|
|
32
|
+
'leaf_estimation_method': 'Gradient',
|
|
33
|
+
'learning_rate': 0.1,
|
|
34
|
+
'max_depth': 3,
|
|
35
|
+
'bootstrap_type': 'Bernoulli',
|
|
36
|
+
'objective': 'MultiClass',
|
|
37
|
+
'eval_metric': 'MultiClass',
|
|
38
|
+
'subsample': 0.8,
|
|
39
|
+
'random_state': 42,
|
|
40
|
+
'verbose': 0}
|
|
41
|
+
>>> booster = vaex.ml.catboost.CatBoostModel(features=features, target='class_', num_boost_round=100, params=params)
|
|
42
|
+
>>> booster.fit(df_train)
|
|
43
|
+
>>> df_train = booster.transform(df_train)
|
|
44
|
+
>>> df_train.head(3)
|
|
45
|
+
# sepal_length sepal_width petal_length petal_width class_ catboost_prediction
|
|
46
|
+
0 5.4 3 4.5 1.5 1 [0.00615039 0.98024259 0.01360702]
|
|
47
|
+
1 4.8 3.4 1.6 0.2 0 [0.99034267 0.00526382 0.0043935 ]
|
|
48
|
+
2 6.9 3.1 4.9 1.5 1 [0.00688241 0.95190908 0.04120851]
|
|
49
|
+
>>> df_test = booster.transform(df_test)
|
|
50
|
+
>>> df_test.head(3)
|
|
51
|
+
# sepal_length sepal_width petal_length petal_width class_ catboost_prediction
|
|
52
|
+
0 5.9 3 4.2 1.5 1 [0.00464228 0.98883351 0.00652421]
|
|
53
|
+
1 6.1 3 4.6 1.4 1 [0.00350424 0.9882139 0.00828186]
|
|
54
|
+
2 6.6 2.9 4.6 1.3 1 [0.00325705 0.98891631 0.00782664]
|
|
55
|
+
'''
|
|
57
56
|
snake_name = "catboost_model"
|
|
58
57
|
features = traitlets.List(traitlets.Unicode(), help='List of features to use when fitting the CatBoostModel.')
|
|
59
58
|
target = traitlets.Unicode(allow_none=False, help='The name of the target column.')
|
vaex/ml/lightgbm.py
CHANGED
|
@@ -86,7 +86,7 @@ class LightGBMModel(state.HasState):
|
|
|
86
86
|
copy.add_virtual_column(self.prediction_name, expression, unique=False)
|
|
87
87
|
return copy
|
|
88
88
|
|
|
89
|
-
def fit(self, df, valid_sets=None, valid_names=None, early_stopping_rounds=None, evals_result=None, verbose_eval=
|
|
89
|
+
def fit(self, df, valid_sets=None, valid_names=None, early_stopping_rounds=None, evals_result=None, verbose_eval=False, **kwargs):
|
|
90
90
|
"""Fit the LightGBMModel to the DataFrame.
|
|
91
91
|
|
|
92
92
|
The model will train until the validation score stops improving.
|
|
@@ -112,14 +112,19 @@ class LightGBMModel(state.HasState):
|
|
|
112
112
|
else:
|
|
113
113
|
valid_sets = ()
|
|
114
114
|
|
|
115
|
+
callbacks = [
|
|
116
|
+
lightgbm.callback.record_evaluation(eval_result=evals_result) if evals_result is not None else None,
|
|
117
|
+
lightgbm.callback.early_stopping(stopping_rounds=early_stopping_rounds) if early_stopping_rounds else None,
|
|
118
|
+
lightgbm.callback.log_evaluation() if verbose_eval else None
|
|
119
|
+
]
|
|
120
|
+
callbacks = [callback for callback in callbacks if callback is not None]
|
|
121
|
+
|
|
115
122
|
self.booster = lightgbm.train(params=self.params,
|
|
116
123
|
train_set=dtrain,
|
|
117
124
|
num_boost_round=self.num_boost_round,
|
|
118
125
|
valid_sets=valid_sets,
|
|
119
126
|
valid_names=valid_names,
|
|
120
|
-
|
|
121
|
-
evals_result=evals_result,
|
|
122
|
-
verbose_eval=verbose_eval,
|
|
127
|
+
callbacks=callbacks,
|
|
123
128
|
**kwargs)
|
|
124
129
|
|
|
125
130
|
def predict(self, df, **kwargs):
|
vaex/ml/spec.json
CHANGED
|
@@ -940,7 +940,7 @@
|
|
|
940
940
|
},
|
|
941
941
|
{
|
|
942
942
|
"classname": "CatBoostModel",
|
|
943
|
-
"doc": "The CatBoost algorithm.\n\
|
|
943
|
+
"doc": "The CatBoost algorithm.\n\nThis class provides an interface to the CatBoost algorithm.\nCatBoost is a fast, scalable, high performance Gradient Boosting on\nDecision Trees library, used for ranking, classification, regression and\nother machine learning tasks. For more information please visit\nhttps://github.com/catboost/catboost\n\nExample:\n\n>>> import vaex\n>>> import vaex.ml.catboost\n>>> df = vaex.datasets.iris()\n>>> features = ['sepal_width', 'petal_length', 'sepal_length', 'petal_width']\n>>> df_train, df_test = df.ml.train_test_split()\n>>> params = {\n 'leaf_estimation_method': 'Gradient',\n 'learning_rate': 0.1,\n 'max_depth': 3,\n 'bootstrap_type': 'Bernoulli',\n 'objective': 'MultiClass',\n 'eval_metric': 'MultiClass',\n 'subsample': 0.8,\n 'random_state': 42,\n 'verbose': 0}\n>>> booster = vaex.ml.catboost.CatBoostModel(features=features, target='class_', num_boost_round=100, params=params)\n>>> booster.fit(df_train)\n>>> df_train = booster.transform(df_train)\n>>> df_train.head(3)\n# sepal_length sepal_width petal_length petal_width class_ catboost_prediction\n0 5.4 3 4.5 1.5 1 [0.00615039 0.98024259 0.01360702]\n1 4.8 3.4 1.6 0.2 0 [0.99034267 0.00526382 0.0043935 ]\n2 6.9 3.1 4.9 1.5 1 [0.00688241 0.95190908 0.04120851]\n>>> df_test = booster.transform(df_test)\n>>> df_test.head(3)\n# sepal_length sepal_width petal_length petal_width class_ catboost_prediction\n0 5.9 3 4.2 1.5 1 [0.00464228 0.98883351 0.00652421]\n1 6.1 3 4.6 1.4 1 [0.00350424 0.9882139 0.00828186]\n2 6.6 2.9 4.6 1.3 1 [0.00325705 0.98891631 0.00782664]\n",
|
|
944
944
|
"module": "vaex.ml.catboost",
|
|
945
945
|
"snake_name": "catboost_model",
|
|
946
946
|
"traits": [
|
vaex/ml/tensorflow.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import tempfile
|
|
3
|
-
import
|
|
3
|
+
import os.path
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
@@ -95,7 +95,10 @@ class DataFrameAccessorTensorflow():
|
|
|
95
95
|
chunk_shape = len(chunk[0].shape) + 1
|
|
96
96
|
transpose_order = np.arange(1, chunk_shape).tolist() + [0]
|
|
97
97
|
X = np.array(chunk[:-n_target_cols]).transpose(transpose_order)
|
|
98
|
-
|
|
98
|
+
if np.lib.NumpyVersion(np.__version__) >= '2.0.0':
|
|
99
|
+
y = np.array(chunk[-n_target_cols:]).T
|
|
100
|
+
else:
|
|
101
|
+
y = np.array(chunk[-n_target_cols:], copy=False).T
|
|
99
102
|
yield (X, y)
|
|
100
103
|
|
|
101
104
|
else:
|
|
@@ -177,13 +180,12 @@ class KerasModel(vaex.ml.state.HasState):
|
|
|
177
180
|
|
|
178
181
|
def state_get(self):
|
|
179
182
|
state = super(KerasModel, self).state_get()
|
|
180
|
-
|
|
181
183
|
with tempfile.TemporaryDirectory() as directory:
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
with open(zip_path, 'rb') as f:
|
|
184
|
+
filepath = os.path.join(directory, 'model.keras')
|
|
185
|
+
self.model.save(filepath)
|
|
186
|
+
with open(filepath, 'rb') as f:
|
|
186
187
|
data = f.read()
|
|
188
|
+
# store model as raw zip bytes base64 encoded
|
|
187
189
|
state['model'] = base64.encodebytes(data).decode('ascii')
|
|
188
190
|
return state
|
|
189
191
|
|
|
@@ -194,8 +196,7 @@ class KerasModel(vaex.ml.state.HasState):
|
|
|
194
196
|
|
|
195
197
|
data = base64.decodebytes(model_data.encode('ascii'))
|
|
196
198
|
with tempfile.TemporaryDirectory() as directory:
|
|
197
|
-
|
|
198
|
-
with open(
|
|
199
|
+
filepath = os.path.join(directory, 'model.keras')
|
|
200
|
+
with open(filepath, 'wb') as f:
|
|
199
201
|
f.write(data)
|
|
200
|
-
|
|
201
|
-
self.model = K.models.load_model(directory, custom_objects=self.custom_objects)
|
|
202
|
+
self.model = K.models.load_model(filepath, custom_objects=self.custom_objects)
|
vaex/ml/transformations.py
CHANGED
|
@@ -423,7 +423,7 @@ class OneHotEncoder(Transformer):
|
|
|
423
423
|
:rtype: DataFrame
|
|
424
424
|
'''
|
|
425
425
|
copy = df.copy()
|
|
426
|
-
downcast_uint8 = np.can_cast(self.one, np.uint8) and np.can_cast(self.zero, np.uint8)
|
|
426
|
+
downcast_uint8 = np.can_cast(np.min_scalar_type(self.one), np.uint8) and np.can_cast(np.min_scalar_type(self.zero), np.uint8)
|
|
427
427
|
dtype = 'uint8' if downcast_uint8 else None
|
|
428
428
|
# for each feature, add a virtual column for each unique entry
|
|
429
429
|
for i, feature in enumerate(self.features):
|
|
@@ -432,7 +432,7 @@ class OneHotEncoder(Transformer):
|
|
|
432
432
|
column_name = self.prefix + feature + '_' + str_value
|
|
433
433
|
if value is None:
|
|
434
434
|
copy[column_name] = copy.func.where(copy[feature].ismissing(), self.one, self.zero, dtype=dtype)
|
|
435
|
-
elif isinstance(value,
|
|
435
|
+
elif isinstance(value, float) and np.isnan(value):
|
|
436
436
|
copy[column_name] = copy.func.where(copy[feature].isnan(), self.one, self.zero, dtype=dtype)
|
|
437
437
|
else:
|
|
438
438
|
copy[column_name] = copy.func.where(copy[feature] == value, self.one, self.zero, dtype=dtype)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vaex-ml
|
|
3
|
+
Version: 0.19.0
|
|
4
|
+
Summary: Machine learning support for vaex
|
|
5
|
+
Home-page: https://www.github.com/vaexio/vaex
|
|
6
|
+
Author: Jovan Veljanoski
|
|
7
|
+
Author-email: jovan.veljanoski@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE.txt
|
|
11
|
+
Requires-Dist: vaex-core~=4.8
|
|
12
|
+
Requires-Dist: numba
|
|
13
|
+
Requires-Dist: traitlets
|
|
14
|
+
Requires-Dist: jinja2
|
|
15
|
+
Requires-Dist: annoy
|
|
16
|
+
Requires-Dist: scikit-learn
|
|
17
|
+
Requires-Dist: xgboost
|
|
18
|
+
Requires-Dist: lightgbm~=4.0
|
|
19
|
+
Requires-Dist: catboost>=1.2.8
|
|
20
|
+
Provides-Extra: all
|
|
21
|
+
Requires-Dist: tensorflow~=2.18; extra == "all"
|
|
22
|
+
Requires-Dist: tensorflow<2.20.0; sys_platform == "darwin" and extra == "all"
|
|
23
|
+
Dynamic: author
|
|
24
|
+
Dynamic: author-email
|
|
25
|
+
Dynamic: description
|
|
26
|
+
Dynamic: description-content-type
|
|
27
|
+
Dynamic: home-page
|
|
28
|
+
Dynamic: license
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
Dynamic: provides-extra
|
|
31
|
+
Dynamic: requires-dist
|
|
32
|
+
Dynamic: summary
|
|
33
|
+
|
|
34
|
+
Machine learning support for vaex
|
|
@@ -1,27 +1,27 @@
|
|
|
1
1
|
vaex/ml/__init__.py,sha256=cg8TmMc1KEJnG9YknEN8VpJ4ikEMcpqnrvxnwMR9NT4,4263
|
|
2
|
-
vaex/ml/_version.py,sha256=
|
|
3
|
-
vaex/ml/catboost.py,sha256=
|
|
2
|
+
vaex/ml/_version.py,sha256=xB41s6qdt06iHS6CO_taADXuaLN_zxOmjAJox-9eQAk,54
|
|
3
|
+
vaex/ml/catboost.py,sha256=gFdDfzCKKUqXTf383PNhINOtI3HdWU_0S33tbSWRZa8,10167
|
|
4
4
|
vaex/ml/cluster.py,sha256=SXnEB1tZZFstC6lqqPI-Y9UjUTaAUYEKJCMsi59Jp-M,10295
|
|
5
5
|
vaex/ml/generate.py,sha256=ZH3ekTUmmq74adiJLT29aA8rsICQyGw4pCddkGnF2nQ,2762
|
|
6
|
-
vaex/ml/lightgbm.py,sha256=
|
|
6
|
+
vaex/ml/lightgbm.py,sha256=vcANvNpK0TYNPOFXbMsCg04Uhl4_AWO0rZQZvzCAKQc,7908
|
|
7
7
|
vaex/ml/linear_model.py,sha256=tx55oPrSfbvH2VwOJ_j5pUPyzsDTOgBlEjfVBZOaf2E,4055
|
|
8
8
|
vaex/ml/metrics.py,sha256=gFpxpE9eNxuF-09Q9X__Tz5VZ_v51Rr0Zod9VtCDxTg,20416
|
|
9
9
|
vaex/ml/pipeline.py,sha256=61w3SINePdyZHbyk1q0T1lNQ3W4wMV56sjCratBEAwU,1001
|
|
10
10
|
vaex/ml/sklearn.py,sha256=AKhkzSs1XLi6VPPCQ77xkpOX8EobgQzBgiPUrbuhddo,11671
|
|
11
|
-
vaex/ml/spec.json,sha256=
|
|
11
|
+
vaex/ml/spec.json,sha256=1-kdehd6auQHVeBoKG2TCne14MPoIitvKrfgFpBh0NE,70870
|
|
12
12
|
vaex/ml/spec.py,sha256=24jkgxIGzzNhsxmir1bIltZQ8ASw-lBReeXF1TyYh_o,1350
|
|
13
13
|
vaex/ml/state.py,sha256=0g59xmYuJeRwBLWa0YiC9QqAdx2aaL6EG9oZoQ1E7HU,1570
|
|
14
|
-
vaex/ml/tensorflow.py,sha256=
|
|
15
|
-
vaex/ml/transformations.py,sha256=
|
|
14
|
+
vaex/ml/tensorflow.py,sha256=4xzBgxwUUSgK59OEoezu7DhvLAi9kkt_BjwwmGU9C9A,9146
|
|
15
|
+
vaex/ml/transformations.py,sha256=xJ_ZM9SArDCXnv1NX2Y_RRPfzUHsE2NPvQ_aDCjCxGA,51329
|
|
16
16
|
vaex/ml/ui.py,sha256=Zg9PWISoUTwRBIaM-4CuezVzIdpSegxNik0DYQJZVEQ,1902
|
|
17
17
|
vaex/ml/xgboost.py,sha256=XxgzKBRiUTDbkaCxe4Ck5seLG1rlk7u7lLIa_uvjojU,7031
|
|
18
18
|
vaex/ml/incubator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
19
|
vaex/ml/incubator/annoy.py,sha256=ZhxT7_tqeZztOmmv29aphRsITbka2ZTgq5bY0YufnhI,3336
|
|
20
20
|
vaex/ml/incubator/pygbm.py,sha256=QQsbYXZ6HaYl5K1i80S3lHtdVMbo2pWKdwMEYg1EqPg,5033
|
|
21
21
|
vaex/ml/incubator/river.py,sha256=1L6_vfRZY1FubyxXbeF_YKtEbq3heafpG2E3SbL8AAY,6106
|
|
22
|
-
vaex_ml-0.
|
|
23
|
-
vaex_ml-0.
|
|
24
|
-
vaex_ml-0.
|
|
25
|
-
vaex_ml-0.
|
|
26
|
-
vaex_ml-0.
|
|
27
|
-
vaex_ml-0.
|
|
22
|
+
vaex_ml-0.19.0.dist-info/licenses/LICENSE.txt,sha256=Ub-0nSwxfezlH092uDbwZqynm9i5wq353cMYNPy5OqY,1087
|
|
23
|
+
vaex_ml-0.19.0.dist-info/METADATA,sha256=ry6S6Y3nMmKeX37dJQTiwgSTEuz9t_2WoUU3ckc-cfo,912
|
|
24
|
+
vaex_ml-0.19.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
25
|
+
vaex_ml-0.19.0.dist-info/entry_points.txt,sha256=cJ8A2uhjPndjLHCMesv0AcKsKK8oCbZclKR3c8yn4Lo,176
|
|
26
|
+
vaex_ml-0.19.0.dist-info/top_level.txt,sha256=BTcHbKEpfXqVlITrjFEtm95UDejFyYXslGl9YUqTIpc,5
|
|
27
|
+
vaex_ml-0.19.0.dist-info/RECORD,,
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: vaex-ml
|
|
3
|
-
Version: 0.18.3
|
|
4
|
-
Summary: Machine learning support for vaex
|
|
5
|
-
Home-page: https://www.github.com/vaexio/vaex
|
|
6
|
-
Author: Jovan Veljanoski
|
|
7
|
-
Author-email: jovan.veljanoski@gmail.com
|
|
8
|
-
License: MIT
|
|
9
|
-
Platform: UNKNOWN
|
|
10
|
-
Requires-Dist: vaex-core (<5,>=4.8.0)
|
|
11
|
-
Requires-Dist: numba
|
|
12
|
-
Requires-Dist: traitlets
|
|
13
|
-
Requires-Dist: jinja2
|
|
14
|
-
Provides-Extra: all
|
|
15
|
-
Requires-Dist: tensorflow (>=2.1.0) ; extra == 'all'
|
|
16
|
-
Requires-Dist: tensorflow-io (>=0.12.0) ; extra == 'all'
|
|
17
|
-
|
|
18
|
-
UNKNOWN
|
|
19
|
-
|
|
20
|
-
|
|
File without changes
|
|
File without changes
|