triggerflow 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trigger_dataset/__init__.py +0 -0
- trigger_dataset/core.py +88 -0
- trigger_loader/__init__.py +0 -0
- trigger_loader/cluster_manager.py +107 -0
- trigger_loader/loader.py +154 -0
- trigger_loader/processor.py +212 -0
- triggerflow/__init__.py +0 -0
- triggerflow/cli.py +122 -0
- triggerflow/core.py +617 -0
- triggerflow/interfaces/__init__.py +0 -0
- triggerflow/interfaces/uGT.py +187 -0
- triggerflow/mlflow_wrapper.py +270 -0
- triggerflow/starter/.gitignore +143 -0
- triggerflow/starter/README.md +0 -0
- triggerflow/starter/cookiecutter.json +5 -0
- triggerflow/starter/prompts.yml +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/.dvcignore +3 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/.gitignore +143 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/.gitlab-ci.yml +56 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/README.md +29 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/README.md +26 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml +84 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters.yml +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_compile.yml +14 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_data_processing.yml +8 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_load_data.yml +5 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_model_training.yml +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_model_validation.yml +5 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/catalog.yml +90 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters.yml +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_compile.yml +14 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_data_processing.yml +8 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_load_data.yml +5 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_model_training.yml +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_model_validation.yml +5 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/logging.yml +43 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/condor_config.json +11 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/cuda_config.json +4 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/samples.json +24 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/settings.json +8 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/test.root +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/02_loaded/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/03_preprocessed/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/04_models/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/05_validation/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/06_compile/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/07_reporting/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/dvc.yaml +7 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/environment.yml +23 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/pyproject.toml +50 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/__init__.py +3 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/__main__.py +25 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/any_object.py +20 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/base_dataset.py +137 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/base_loader.py +101 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/meta_dataset.py +49 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/{{ cookiecutter.python_package }}_dataset.py +35 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/{{ cookiecutter.python_package }}_loader.py +32 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/models/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/models/base_model.py +155 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/models/{{ cookiecutter.python_package }}_model.py +16 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipeline_registry.py +17 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/compile/__init__.py +10 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/compile/nodes.py +70 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/compile/pipeline.py +20 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/__init__.py +10 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/nodes.py +41 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/pipeline.py +28 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/load_data/__init__.py +10 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/load_data/nodes.py +13 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/load_data/pipeline.py +20 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_training/__init__.py +10 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_training/nodes.py +48 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_training/pipeline.py +24 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_validation/__init__.py +10 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_validation/nodes.py +31 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_validation/pipeline.py +24 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py +46 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/utils/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/utils/metric.py +4 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/utils/plotting.py +598 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/compile/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/compile/test_pipeline.py +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/data_processing/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/data_processing/test_pipeline.py +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/load_data/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/load_data/test_pipeline.py +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_training/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_training/test_pipeline.py +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_validation/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_validation/test_pipeline.py +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/test_run.py +27 -0
- triggerflow/templates/build_ugt.tcl +46 -0
- triggerflow/templates/data_types.h +524 -0
- triggerflow/templates/makefile +28 -0
- triggerflow/templates/makefile_version +15 -0
- triggerflow/templates/model-gt.cpp +104 -0
- triggerflow/templates/model_template.cpp +63 -0
- triggerflow/templates/scales.h +20 -0
- triggerflow-0.3.4.dist-info/METADATA +206 -0
- triggerflow-0.3.4.dist-info/RECORD +107 -0
- triggerflow-0.3.4.dist-info/WHEEL +5 -0
- triggerflow-0.3.4.dist-info/entry_points.txt +2 -0
- triggerflow-0.3.4.dist-info/top_level.txt +3 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a boilerplate pipeline 'compile'
|
|
3
|
+
generated using Kedro 1.0.0
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
import mlflow
|
|
8
|
+
from mlflow.tracking import MlflowClient
|
|
9
|
+
import os
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from triggerflow.core import TriggerModel
|
|
13
|
+
from triggerflow.mlflow_wrapper import log_model
|
|
14
|
+
from sklearn.metrics import roc_auc_score
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def compile_model(
|
|
18
|
+
model, X_test: pd.DataFrame, y_test: pd.DataFrame
|
|
19
|
+
) -> pd.DataFrame:
|
|
20
|
+
"""Compiles the model and runs some further checks.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
model:
|
|
24
|
+
X_test:
|
|
25
|
+
y_test:
|
|
26
|
+
config:
|
|
27
|
+
Returns:
|
|
28
|
+
Model prediction.
|
|
29
|
+
"""
|
|
30
|
+
# get logger for reporting
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
scales = {
|
|
35
|
+
'offsets': [int(x) for x in np.ones(X_test.shape[0])],
|
|
36
|
+
'shifts': [int(x) for x in np.ones(X_test.shape[0])]
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
trigger_model = TriggerModel(
|
|
40
|
+
config="trigger_model_config.yaml",
|
|
41
|
+
native_model= model,
|
|
42
|
+
scales=scales
|
|
43
|
+
)
|
|
44
|
+
trigger_model()
|
|
45
|
+
|
|
46
|
+
trigger_model.save("triggermodel.tar.xz")
|
|
47
|
+
|
|
48
|
+
X = np.ascontiguousarray(X_test.values)
|
|
49
|
+
|
|
50
|
+
output_software = trigger_model.software_predict(X)
|
|
51
|
+
output_firmware = trigger_model.firmware_predict(X)
|
|
52
|
+
try:
|
|
53
|
+
output_qonnx = trigger_model.qonnx_predict(X)
|
|
54
|
+
except:
|
|
55
|
+
logger.info('QONNX representation not found')
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
outdir = "data/06_compile"
|
|
59
|
+
|
|
60
|
+
np.save(os.path.join(outdir, "output_software.npy"), np.array(output_software))
|
|
61
|
+
np.save(os.path.join(outdir, "output_firmware.npy"), np.array(output_firmware))
|
|
62
|
+
|
|
63
|
+
auc_software = roc_auc_score(y_test, output_software)
|
|
64
|
+
auc_firmware = roc_auc_score(y_test, output_firmware)
|
|
65
|
+
|
|
66
|
+
logger.info(f"Area under ROC curve Software: {auc_software:.4f}")
|
|
67
|
+
logger.info(f"Area under ROC curve Firmware: {auc_firmware:.4f}")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
return [auc_software, auc_firmware]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a boilerplate pipeline 'compile'
|
|
3
|
+
generated using Kedro 1.0.0
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from kedro.pipeline import node, Pipeline, pipeline # noqa
|
|
7
|
+
from .nodes import compile_model
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_pipeline(**kwargs) -> Pipeline:
|
|
11
|
+
return pipeline(
|
|
12
|
+
[
|
|
13
|
+
node(
|
|
14
|
+
func=compile_model,
|
|
15
|
+
inputs=["train_model", "processed_{{ cookiecutter.python_package }}_X_test", "processed_{{ cookiecutter.python_package }}_y_test"],
|
|
16
|
+
outputs="model_aucs",
|
|
17
|
+
name="compile_model_node",
|
|
18
|
+
)
|
|
19
|
+
]
|
|
20
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a boilerplate pipeline 'data_processing'
|
|
3
|
+
generated using Kedro 1.0.0
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from sklearn.model_selection import train_test_split
|
|
8
|
+
from sklearn.preprocessing import StandardScaler
|
|
9
|
+
import mlflow
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def data_processing(
|
|
13
|
+
data: pd.DataFrame, random_state: int, test_size: float
|
|
14
|
+
) -> pd.DataFrame:
|
|
15
|
+
"""Preprocesses some data.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
data: Raw data.
|
|
19
|
+
random_state:
|
|
20
|
+
Returns:
|
|
21
|
+
X_train:
|
|
22
|
+
X_test:
|
|
23
|
+
y_train:
|
|
24
|
+
y_test:
|
|
25
|
+
event_ids:
|
|
26
|
+
scaler:
|
|
27
|
+
"""
|
|
28
|
+
y = data["y"].to_frame()
|
|
29
|
+
event_ids = data["event"].to_frame()
|
|
30
|
+
X = data.drop(columns=["y", "event"])
|
|
31
|
+
|
|
32
|
+
# Normalize features
|
|
33
|
+
scaler = StandardScaler()
|
|
34
|
+
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns, index=X.index)
|
|
35
|
+
|
|
36
|
+
# Split into training and test sets
|
|
37
|
+
X_train, X_test, y_train, y_test, ids_train, ids_test = train_test_split(
|
|
38
|
+
X_scaled, y, event_ids, test_size=test_size, random_state=random_state
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
return X_train, X_test, y_train, y_test, scaler, ids_train, ids_test
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a boilerplate pipeline 'data_processing'
|
|
3
|
+
generated using Kedro 1.0.0
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from kedro.pipeline import node, Pipeline, pipeline # noqa
|
|
7
|
+
from .nodes import data_processing
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_pipeline(**kwargs) -> Pipeline:
|
|
11
|
+
return pipeline(
|
|
12
|
+
[
|
|
13
|
+
node(
|
|
14
|
+
func=data_processing,
|
|
15
|
+
inputs=["{{ cookiecutter.python_package }}_data_loaded", "params:random_state", "params:test_size"],
|
|
16
|
+
outputs=[
|
|
17
|
+
"processed_{{ cookiecutter.python_package }}_X_train",
|
|
18
|
+
"processed_{{ cookiecutter.python_package }}_X_test",
|
|
19
|
+
"processed_{{ cookiecutter.python_package }}_y_train",
|
|
20
|
+
"processed_{{ cookiecutter.python_package }}_y_test",
|
|
21
|
+
"scaler",
|
|
22
|
+
"event_ids_train",
|
|
23
|
+
"event_ids_test",
|
|
24
|
+
],
|
|
25
|
+
name="data_processing_{{ cookiecutter.python_package }}_node",
|
|
26
|
+
)
|
|
27
|
+
]
|
|
28
|
+
)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a boilerplate pipeline 'model_training'
|
|
3
|
+
generated using Kedro 1.0.0
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from glob import glob
|
|
9
|
+
import mlflow
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def load_data({{ cookiecutter.python_package }}_data: pd.DataFrame, meta_data: dict) -> list[dict, pd.DataFrame]:
|
|
13
|
+
return {{ cookiecutter.python_package }}_data, meta_data
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a boilerplate pipeline 'model_training'
|
|
3
|
+
generated using Kedro 1.0.0
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from kedro.pipeline import node, Pipeline, pipeline # noqa
|
|
7
|
+
from .nodes import load_data
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_pipeline(**kwargs) -> Pipeline:
|
|
11
|
+
return pipeline(
|
|
12
|
+
[
|
|
13
|
+
node(
|
|
14
|
+
func=load_data,
|
|
15
|
+
inputs=["{{ cookiecutter.python_package }}_loader", "{{ cookiecutter.python_package }}_meta_data"],
|
|
16
|
+
outputs=["{{ cookiecutter.python_package }}_data_loaded", "{{ cookiecutter.python_package }}_meta_data_loaded"],
|
|
17
|
+
name="load_data",
|
|
18
|
+
)
|
|
19
|
+
]
|
|
20
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a boilerplate pipeline 'model_training'
|
|
3
|
+
generated using Kedro 1.0.0
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import mlflow
|
|
8
|
+
from tensorflow.keras.models import Sequential
|
|
9
|
+
from tensorflow.keras.layers import Dense
|
|
10
|
+
from tensorflow.keras.optimizers import Adam
|
|
11
|
+
from demo_pipeline.utils.plotting import plotTrainingHistory
|
|
12
|
+
from {{ cookiecutter.python_package }}.utils.plotting import plotTrainingHistory, get_dummy
|
|
13
|
+
from {{ cookiecutter.python_package }}.models.{{ cookiecutter.python_package }}_model import {{ cookiecutter.python_package }}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def train_model(
|
|
18
|
+
X_train: pd.DataFrame, y_train: pd.DataFrame, params: dict
|
|
19
|
+
):
|
|
20
|
+
"""Trains a simple Keras model on the data.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
X_train: Training features.
|
|
24
|
+
y_train: Training labels.
|
|
25
|
+
params: Dictionary containing hyperparameters like 'learning_rate', 'epochs', 'batch_size', 'n_hidden', 'name'.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Trained Keras model.
|
|
29
|
+
"""
|
|
30
|
+
n_inputs = X_train.shape[1]
|
|
31
|
+
n_hidden = params["hps"].get("n_hidden", 32)
|
|
32
|
+
learning_rate = params["hps"].get("learning_rate", 0.001)
|
|
33
|
+
epochs = params["hps"].get("epochs", 10)
|
|
34
|
+
batch_size = params["hps"].get("batch_size", 32)
|
|
35
|
+
|
|
36
|
+
model = Sequential([
|
|
37
|
+
Dense(n_hidden, input_shape=(n_inputs,), activation="relu"),
|
|
38
|
+
Dense(1, activation="linear") # adjust activation for classification if needed
|
|
39
|
+
])
|
|
40
|
+
model.compile(optimizer=Adam(learning_rate=learning_rate), loss="mse", metrics=["mae"])
|
|
41
|
+
|
|
42
|
+
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)
|
|
43
|
+
|
|
44
|
+
# Optional: plot training history
|
|
45
|
+
# f, _ = plotTrainingHistory(history)
|
|
46
|
+
|
|
47
|
+
return model
|
|
48
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a boilerplate pipeline 'model_training'
|
|
3
|
+
generated using Kedro 1.0.0
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from kedro.pipeline import node, Pipeline, pipeline # noqa
|
|
7
|
+
from .nodes import train_model
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_pipeline(**kwargs) -> Pipeline:
|
|
11
|
+
return pipeline(
|
|
12
|
+
[
|
|
13
|
+
node(
|
|
14
|
+
func=train_model,
|
|
15
|
+
inputs=[
|
|
16
|
+
"processed_{{ cookiecutter.python_package }}_X_train",
|
|
17
|
+
"processed_{{ cookiecutter.python_package }}_y_train",
|
|
18
|
+
"params:{{ cookiecutter.python_package }}_model",
|
|
19
|
+
],
|
|
20
|
+
outputs="train_model",
|
|
21
|
+
name="train_model_node",
|
|
22
|
+
)
|
|
23
|
+
]
|
|
24
|
+
)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a boilerplate pipeline 'model_validation'
|
|
3
|
+
generated using Kedro 1.0.0
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from sklearn.metrics import roc_auc_score
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def validated_model(model, X_test: pd.DataFrame, y_test: pd.DataFrame) -> pd.DataFrame:
|
|
12
|
+
"""Trains a dummy model on some data.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
model:
|
|
16
|
+
X_test:
|
|
17
|
+
y_test:
|
|
18
|
+
Returns:
|
|
19
|
+
Model prediction.
|
|
20
|
+
"""
|
|
21
|
+
# get logger for reporting
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
pred = model.predict(X_test)
|
|
25
|
+
auc = roc_auc_score(y_test, pred)
|
|
26
|
+
|
|
27
|
+
logger.info(f"Area under ROC curve: {auc:.4f}")
|
|
28
|
+
|
|
29
|
+
proba = model.predict(X_test)
|
|
30
|
+
pred = proba[:, -1]
|
|
31
|
+
return pd.DataFrame({"prediction": pred})
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This is a boilerplate pipeline 'model_validation'
|
|
3
|
+
generated using Kedro 1.0.0
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from kedro.pipeline import node, Pipeline, pipeline # noqa
|
|
7
|
+
from .nodes import validated_model
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_pipeline(**kwargs) -> Pipeline:
|
|
11
|
+
return pipeline(
|
|
12
|
+
[
|
|
13
|
+
node(
|
|
14
|
+
func=validated_model,
|
|
15
|
+
inputs=[
|
|
16
|
+
"train_model",
|
|
17
|
+
"processed_{{ cookiecutter.python_package }}_X_test",
|
|
18
|
+
"processed_{{ cookiecutter.python_package }}_y_test",
|
|
19
|
+
],
|
|
20
|
+
outputs="model_pred",
|
|
21
|
+
name="validated_model_node",
|
|
22
|
+
)
|
|
23
|
+
]
|
|
24
|
+
)
|
triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Project settings. There is no need to edit this file unless you want to change values
|
|
2
|
+
from the Kedro defaults. For further information, including these default values, see
|
|
3
|
+
https://docs.kedro.org/en/stable/kedro_project_setup/settings.html."""
|
|
4
|
+
|
|
5
|
+
# Instantiated project hooks.
|
|
6
|
+
# For example, after creating a hooks.py and defining a ProjectHooks class there, do
|
|
7
|
+
# from {{ cookiecutter.python_package }}.hooks import ProjectHooks
|
|
8
|
+
# Hooks are executed in a Last-In-First-Out (LIFO) order.
|
|
9
|
+
# HOOKS = (ProjectHooks(),)
|
|
10
|
+
|
|
11
|
+
# Installed plugins for which to disable hook auto-registration.
|
|
12
|
+
# DISABLE_HOOKS_FOR_PLUGINS = ("kedro-viz",)
|
|
13
|
+
|
|
14
|
+
# Class that manages storing KedroSession data.
|
|
15
|
+
# from kedro.framework.session.store import BaseSessionStore
|
|
16
|
+
# SESSION_STORE_CLASS = BaseSessionStore
|
|
17
|
+
# Keyword arguments to pass to the `SESSION_STORE_CLASS` constructor.
|
|
18
|
+
# SESSION_STORE_ARGS = {
|
|
19
|
+
# "path": "./sessions"
|
|
20
|
+
# }
|
|
21
|
+
|
|
22
|
+
# Directory that holds configuration.
|
|
23
|
+
# CONF_SOURCE = "conf"
|
|
24
|
+
|
|
25
|
+
# Class that manages how configuration is loaded.
|
|
26
|
+
# from kedro.config import OmegaConfigDataset
|
|
27
|
+
|
|
28
|
+
# CONFIG_DATASET_CLASS = OmegaConfigDataset
|
|
29
|
+
|
|
30
|
+
# Keyword arguments to pass to the `CONFIG_DATASET_CLASS` constructor.
|
|
31
|
+
CONFIG_DATASET_ARGS = {
|
|
32
|
+
"base_env": "base",
|
|
33
|
+
"default_run_env": "local",
|
|
34
|
+
# "config_patterns": {
|
|
35
|
+
# "spark" : ["spark*/"],
|
|
36
|
+
# "parameters": ["parameters*", "parameters*/**", "**/parameters*"],
|
|
37
|
+
# }
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
# Class that manages Kedro's library components.
|
|
41
|
+
# from kedro.framework.context import KedroContext
|
|
42
|
+
# CONTEXT_CLASS = KedroContext
|
|
43
|
+
|
|
44
|
+
# Class that manages the Data Catalog.
|
|
45
|
+
# from kedro.io import DataCatalog
|
|
46
|
+
# DATA_CATALOG_CLASS = DataCatalog
|
|
File without changes
|