triggerflow 0.1.12__py3-none-any.whl → 0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trigger_dataset/__init__.py +0 -0
- trigger_dataset/core.py +88 -0
- trigger_loader/__init__.py +0 -0
- trigger_loader/cluster_manager.py +107 -0
- trigger_loader/loader.py +95 -0
- trigger_loader/processor.py +211 -0
- triggerflow/cli.py +122 -0
- triggerflow/core.py +118 -114
- triggerflow/mlflow_wrapper.py +54 -49
- triggerflow/starter/.gitignore +143 -0
- triggerflow/starter/README.md +0 -0
- triggerflow/starter/cookiecutter.json +5 -0
- triggerflow/starter/prompts.yml +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/.dvcignore +3 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/.gitignore +143 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/.gitlab-ci.yml +56 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/README.md +29 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/README.md +26 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml +84 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters.yml +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_compile.yml +14 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_data_processing.yml +8 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_load_data.yml +5 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_model_training.yml +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_model_validation.yml +5 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/catalog.yml +84 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters.yml +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_compile.yml +14 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_data_processing.yml +8 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_load_data.yml +5 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_model_training.yml +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_model_validation.yml +5 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/conf/logging.yml +43 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/samples.json +15 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/samples_dummy.json +26 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/02_loaded/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/03_preprocessed/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/04_models/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/05_validation/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/06_compile/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/data/07_reporting/.gitkeep +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/dvc.yaml +7 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/environment.yml +21 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/pyproject.toml +50 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/__init__.py +3 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/__main__.py +25 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/any_object.py +20 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/base_dataset.py +137 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/meta_dataset.py +88 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/{{ cookiecutter.python_package }}_dataset.py +35 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/models/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/models/base_model.py +155 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/models/{{ cookiecutter.python_package }}_model.py +16 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipeline_registry.py +17 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/compile/__init__.py +10 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/compile/nodes.py +50 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/compile/pipeline.py +10 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/__init__.py +10 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/nodes.py +40 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/pipeline.py +28 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/load_data/__init__.py +10 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/load_data/nodes.py +12 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/load_data/pipeline.py +20 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_training/__init__.py +10 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_training/nodes.py +31 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_training/pipeline.py +24 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_validation/__init__.py +10 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_validation/nodes.py +29 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_validation/pipeline.py +24 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py +46 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/utils/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/utils/metric.py +4 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/utils/plotting.py +598 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/compile/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/compile/test_pipeline.py +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/data_processing/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/data_processing/test_pipeline.py +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/load_data/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/load_data/test_pipeline.py +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_training/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_training/test_pipeline.py +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_validation/__init__.py +0 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_validation/test_pipeline.py +9 -0
- triggerflow/starter/{{ cookiecutter.repo_name }}/tests/test_run.py +27 -0
- triggerflow-0.2.dist-info/METADATA +97 -0
- triggerflow-0.2.dist-info/RECORD +97 -0
- triggerflow-0.2.dist-info/entry_points.txt +2 -0
- triggerflow-0.2.dist-info/top_level.txt +3 -0
- triggerflow-0.1.12.dist-info/METADATA +0 -61
- triggerflow-0.1.12.dist-info/RECORD +0 -11
- triggerflow-0.1.12.dist-info/top_level.txt +0 -1
- {triggerflow-0.1.12.dist-info → triggerflow-0.2.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
{{ cookiecutter.python_package }}_meta_data:
|
|
2
|
+
filepath: data/01_raw/samples_dummy.json
|
|
3
|
+
sample_key: samples
|
|
4
|
+
type: {{ cookiecutter.python_package }}.datasets.meta_dataset.MetaDataset
|
|
5
|
+
|
|
6
|
+
{{ cookiecutter.python_package }}_data:
|
|
7
|
+
sample_info: data/01_raw/samples_dummy.json
|
|
8
|
+
sample_key: samples
|
|
9
|
+
type: {{ cookiecutter.python_package }}.datasets.{{ cookiecutter.python_package }}_dataset.{{ cookiecutter.project_name }}Dataset
|
|
10
|
+
|
|
11
|
+
{{ cookiecutter.python_package }}_meta_data_loaded:
|
|
12
|
+
filepath: data/02_loaded/{{ cookiecutter.python_package }}_meta_data.json
|
|
13
|
+
sample_key: samples
|
|
14
|
+
type: {{ cookiecutter.python_package }}.datasets.meta_dataset.MetaDataset
|
|
15
|
+
|
|
16
|
+
{{ cookiecutter.python_package }}_data_loaded:
|
|
17
|
+
filepath: data/02_loaded/{{ cookiecutter.python_package }}_data.csv
|
|
18
|
+
save_args:
|
|
19
|
+
index: False
|
|
20
|
+
sep: ','
|
|
21
|
+
type: pandas.CSVDataset
|
|
22
|
+
|
|
23
|
+
processed_{{ cookiecutter.python_package }}_X_train:
|
|
24
|
+
filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_X_train.csv
|
|
25
|
+
save_args:
|
|
26
|
+
index: False
|
|
27
|
+
sep: ','
|
|
28
|
+
type: pandas.CSVDataset
|
|
29
|
+
|
|
30
|
+
processed_{{ cookiecutter.python_package }}_X_test:
|
|
31
|
+
filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_X_test.csv
|
|
32
|
+
save_args:
|
|
33
|
+
index: False
|
|
34
|
+
sep: ','
|
|
35
|
+
type: pandas.CSVDataset
|
|
36
|
+
|
|
37
|
+
processed_{{ cookiecutter.python_package }}_y_train:
|
|
38
|
+
filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_y_train.csv
|
|
39
|
+
save_args:
|
|
40
|
+
index: False
|
|
41
|
+
sep: ','
|
|
42
|
+
type: pandas.CSVDataset
|
|
43
|
+
|
|
44
|
+
processed_{{ cookiecutter.python_package }}_y_test:
|
|
45
|
+
filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_y_test.csv
|
|
46
|
+
save_args:
|
|
47
|
+
index: False
|
|
48
|
+
sep: ','
|
|
49
|
+
type: pandas.CSVDataset
|
|
50
|
+
|
|
51
|
+
event_ids_train:
|
|
52
|
+
filepath: data/03_preprocessed/event_ids_train.csv
|
|
53
|
+
save_args:
|
|
54
|
+
index: False
|
|
55
|
+
sep: ','
|
|
56
|
+
type: pandas.CSVDataset
|
|
57
|
+
|
|
58
|
+
event_ids_test:
|
|
59
|
+
filepath: data/03_preprocessed/event_ids_test.csv
|
|
60
|
+
save_args:
|
|
61
|
+
index: False
|
|
62
|
+
sep: ','
|
|
63
|
+
type: pandas.CSVDataset
|
|
64
|
+
|
|
65
|
+
scaler:
|
|
66
|
+
filepath: data/03_preprocessed/scaler.pkl
|
|
67
|
+
type: pickle.PickleDataset
|
|
68
|
+
|
|
69
|
+
train_model:
|
|
70
|
+
filepath: data/04_models/trained_model.pkl
|
|
71
|
+
type: pickle.PickleDataset
|
|
72
|
+
|
|
73
|
+
training_history:
|
|
74
|
+
type: matplotlib.MatplotlibDataset
|
|
75
|
+
filepath: data/07_reporting/training_history.png
|
|
76
|
+
save_args:
|
|
77
|
+
format: png
|
|
78
|
+
|
|
79
|
+
model_pred:
|
|
80
|
+
filepath: data/05_validation/model_pred.pkl
|
|
81
|
+
save_args:
|
|
82
|
+
index: False
|
|
83
|
+
sep: ','
|
|
84
|
+
type: pandas.CSVDataset
|
|
File without changes
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# This is a boilerplate parameters config generated for pipeline 'compile'
|
|
2
|
+
# using Kedro 1.0.0
|
|
3
|
+
#
|
|
4
|
+
# Documentation for this file format can be found in "Parameters"
|
|
5
|
+
# Link: https://docs.kedro.org/en/1.0.0/configuration/parameters.html
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
compile:
|
|
9
|
+
name: "munet"
|
|
10
|
+
ml_backend: "Keras"
|
|
11
|
+
compiler: "hls4ml"
|
|
12
|
+
mlflow_url: "https://mlflow-deploy-mflow.app.cern.ch"
|
|
13
|
+
compiler_config:
|
|
14
|
+
test: 123
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# This is a boilerplate parameters config generated for pipeline 'data_processing'
|
|
2
|
+
# using Kedro 1.0.0
|
|
3
|
+
#
|
|
4
|
+
# Documentation for this file format can be found in "Parameters"
|
|
5
|
+
# Link: https://docs.kedro.org/en/1.0.0/configuration/parameters.html
|
|
6
|
+
|
|
7
|
+
random_state: 42
|
|
8
|
+
test_size: 0.5
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# This is a boilerplate parameters config generated for pipeline 'model_training'
|
|
2
|
+
# using Kedro 1.0.0
|
|
3
|
+
#
|
|
4
|
+
# Documentation for this file format can be found in "Parameters"
|
|
5
|
+
# Link: https://docs.kedro.org/en/1.0.0/configuration/parameters.html
|
|
6
|
+
|
|
7
|
+
{{ cookiecutter.python_package }}_model:
|
|
8
|
+
hps:
|
|
9
|
+
name: "{{ cookiecutter.python_package }}"
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
{{ cookiecutter.python_package }}_meta_data:
|
|
2
|
+
filepath: data/01_raw/samples_dummy.json
|
|
3
|
+
sample_key: samples
|
|
4
|
+
type: {{ cookiecutter.python_package }}.datasets.meta_dataset.MetaDataset
|
|
5
|
+
|
|
6
|
+
{{ cookiecutter.python_package }}_data:
|
|
7
|
+
sample_info: data/01_raw/samples_dummy.json
|
|
8
|
+
sample_key: samples
|
|
9
|
+
type: {{ cookiecutter.python_package }}.datasets.{{ cookiecutter.python_package }}_dataset.{{ cookiecutter.project_name }}Dataset
|
|
10
|
+
|
|
11
|
+
{{ cookiecutter.python_package }}_meta_data_loaded:
|
|
12
|
+
filepath: data/02_loaded/{{ cookiecutter.python_package }}_meta_data.json
|
|
13
|
+
sample_key: samples
|
|
14
|
+
type: {{ cookiecutter.python_package }}.datasets.meta_dataset.MetaDataset
|
|
15
|
+
|
|
16
|
+
{{ cookiecutter.python_package }}_data_loaded:
|
|
17
|
+
filepath: data/02_loaded/{{ cookiecutter.python_package }}_data.csv
|
|
18
|
+
save_args:
|
|
19
|
+
index: False
|
|
20
|
+
sep: ','
|
|
21
|
+
type: pandas.CSVDataset
|
|
22
|
+
|
|
23
|
+
processed_{{ cookiecutter.python_package }}_X_train:
|
|
24
|
+
filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_X_train.csv
|
|
25
|
+
save_args:
|
|
26
|
+
index: False
|
|
27
|
+
sep: ','
|
|
28
|
+
type: pandas.CSVDataset
|
|
29
|
+
|
|
30
|
+
processed_{{ cookiecutter.python_package }}_X_test:
|
|
31
|
+
filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_X_test.csv
|
|
32
|
+
save_args:
|
|
33
|
+
index: False
|
|
34
|
+
sep: ','
|
|
35
|
+
type: pandas.CSVDataset
|
|
36
|
+
|
|
37
|
+
processed_{{ cookiecutter.python_package }}_y_train:
|
|
38
|
+
filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_y_train.csv
|
|
39
|
+
save_args:
|
|
40
|
+
index: False
|
|
41
|
+
sep: ','
|
|
42
|
+
type: pandas.CSVDataset
|
|
43
|
+
|
|
44
|
+
processed_{{ cookiecutter.python_package }}_y_test:
|
|
45
|
+
filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_y_test.csv
|
|
46
|
+
save_args:
|
|
47
|
+
index: False
|
|
48
|
+
sep: ','
|
|
49
|
+
type: pandas.CSVDataset
|
|
50
|
+
|
|
51
|
+
event_ids_train:
|
|
52
|
+
filepath: data/03_preprocessed/event_ids_train.csv
|
|
53
|
+
save_args:
|
|
54
|
+
index: False
|
|
55
|
+
sep: ','
|
|
56
|
+
type: pandas.CSVDataset
|
|
57
|
+
|
|
58
|
+
event_ids_test:
|
|
59
|
+
filepath: data/03_preprocessed/event_ids_test.csv
|
|
60
|
+
save_args:
|
|
61
|
+
index: False
|
|
62
|
+
sep: ','
|
|
63
|
+
type: pandas.CSVDataset
|
|
64
|
+
|
|
65
|
+
scaler:
|
|
66
|
+
filepath: data/03_preprocessed/scaler.pkl
|
|
67
|
+
type: pickle.PickleDataset
|
|
68
|
+
|
|
69
|
+
train_model:
|
|
70
|
+
filepath: data/04_models/trained_model.pkl
|
|
71
|
+
type: pickle.PickleDataset
|
|
72
|
+
|
|
73
|
+
training_history:
|
|
74
|
+
type: matplotlib.MatplotlibDataset
|
|
75
|
+
filepath: data/07_reporting/training_history.png
|
|
76
|
+
save_args:
|
|
77
|
+
format: png
|
|
78
|
+
|
|
79
|
+
model_pred:
|
|
80
|
+
filepath: data/05_validation/model_pred.pkl
|
|
81
|
+
save_args:
|
|
82
|
+
index: False
|
|
83
|
+
sep: ','
|
|
84
|
+
type: pandas.CSVDataset
|
|
File without changes
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# This is a boilerplate parameters config generated for pipeline 'compile'
|
|
2
|
+
# using Kedro 0.19.14.
|
|
3
|
+
#
|
|
4
|
+
# Documentation for this file format can be found in "Parameters"
|
|
5
|
+
# Link: https://docs.kedro.org/en/0.19.14/configuration/parameters.html
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
compile:
|
|
9
|
+
name: "munet"
|
|
10
|
+
ml_backend: "Keras"
|
|
11
|
+
compiler: "hls4ml"
|
|
12
|
+
mlflow_url: "https://mlflow-deploy-mflow.app.cern.ch"
|
|
13
|
+
compiler_config:
|
|
14
|
+
test: 123
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# This is a boilerplate parameters config generated for pipeline 'data_processing'
|
|
2
|
+
# using Kedro 0.19.14.
|
|
3
|
+
#
|
|
4
|
+
# Documentation for this file format can be found in "Parameters"
|
|
5
|
+
# Link: https://docs.kedro.org/en/0.19.14/configuration/parameters.html
|
|
6
|
+
|
|
7
|
+
random_state: 42
|
|
8
|
+
test_size: 0.5
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# This is a boilerplate parameters config generated for pipeline 'model_training'
|
|
2
|
+
# using Kedro 1.0.0
|
|
3
|
+
#
|
|
4
|
+
# Documentation for this file format can be found in "Parameters"
|
|
5
|
+
# Link: https://docs.kedro.org/en/1.0.0/configuration/parameters.html
|
|
6
|
+
|
|
7
|
+
{{ cookiecutter.python_package }}_model:
|
|
8
|
+
hps:
|
|
9
|
+
name: "{{ cookiecutter.python_package }}"
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# To enable this custom logging configuration, set KEDRO_LOGGING_CONFIG to the path of this file.
|
|
2
|
+
# More information available at https://docs.kedro.org/en/stable/logging/logging.html
|
|
3
|
+
version: 1
|
|
4
|
+
|
|
5
|
+
disable_existing_loggers: False
|
|
6
|
+
|
|
7
|
+
formatters:
|
|
8
|
+
simple:
|
|
9
|
+
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
10
|
+
|
|
11
|
+
handlers:
|
|
12
|
+
console:
|
|
13
|
+
class: logging.StreamHandler
|
|
14
|
+
level: INFO
|
|
15
|
+
formatter: simple
|
|
16
|
+
stream: ext://sys.stdout
|
|
17
|
+
|
|
18
|
+
info_file_handler:
|
|
19
|
+
class: logging.handlers.RotatingFileHandler
|
|
20
|
+
level: INFO
|
|
21
|
+
formatter: simple
|
|
22
|
+
filename: info.log
|
|
23
|
+
maxBytes: 10485760 # 10MB
|
|
24
|
+
backupCount: 20
|
|
25
|
+
encoding: utf8
|
|
26
|
+
delay: True
|
|
27
|
+
|
|
28
|
+
rich:
|
|
29
|
+
class: kedro.logging.RichHandler
|
|
30
|
+
rich_tracebacks: True
|
|
31
|
+
# Advance options for customisation.
|
|
32
|
+
# See https://docs.kedro.org/en/stable/logging/logging.html#project-side-logging-configuration
|
|
33
|
+
# tracebacks_show_locals: False
|
|
34
|
+
|
|
35
|
+
loggers:
|
|
36
|
+
kedro:
|
|
37
|
+
level: INFO
|
|
38
|
+
|
|
39
|
+
{{ cookiecutter.python_package }}:
|
|
40
|
+
level: INFO
|
|
41
|
+
|
|
42
|
+
root:
|
|
43
|
+
handlers: [rich, info_file_handler]
|
|
File without changes
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"samples" : {
|
|
3
|
+
"testSample" : {
|
|
4
|
+
"all_file_path":"data/01_raw/test.root",
|
|
5
|
+
"path":"data/01_raw/test.root",
|
|
6
|
+
"file_pattern":["test.root"],
|
|
7
|
+
"DAS" : "Blabla",
|
|
8
|
+
"type" : "123",
|
|
9
|
+
"data" : false,
|
|
10
|
+
"era" : "phase1",
|
|
11
|
+
"run" : "run3",
|
|
12
|
+
"is_signal": true
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"samples" : {
|
|
3
|
+
"muon24I" : {
|
|
4
|
+
"path": "data/01_raw/samples_dummy.json",
|
|
5
|
+
"folder":"data/01_raw/",
|
|
6
|
+
"file_pattern":["samples_dummy.json"],
|
|
7
|
+
"DAS" : "-",
|
|
8
|
+
"type" : "-",
|
|
9
|
+
"data" : true,
|
|
10
|
+
"era" : "phase1",
|
|
11
|
+
"run" : "run3",
|
|
12
|
+
"is_signal": true
|
|
13
|
+
},
|
|
14
|
+
"zb24I" : {
|
|
15
|
+
"path": "data/01_raw/samples_dummy.json",
|
|
16
|
+
"folder":"data/01_raw/",
|
|
17
|
+
"file_pattern":["samples_dummy.json"],
|
|
18
|
+
"DAS" : "-",
|
|
19
|
+
"type" : "-",
|
|
20
|
+
"data" : true,
|
|
21
|
+
"era" : "phase1",
|
|
22
|
+
"run" : "run3",
|
|
23
|
+
"is_signal": false
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
channels:
|
|
2
|
+
- conda-forge
|
|
3
|
+
- defaults
|
|
4
|
+
dependencies:
|
|
5
|
+
- python=3.11
|
|
6
|
+
- pip
|
|
7
|
+
- pip:
|
|
8
|
+
- kedro
|
|
9
|
+
- kedro-viz
|
|
10
|
+
- kedro-datasets
|
|
11
|
+
- matplotlib
|
|
12
|
+
- mplhep
|
|
13
|
+
- shap
|
|
14
|
+
- scikit-learn
|
|
15
|
+
- pandas
|
|
16
|
+
- dvc
|
|
17
|
+
- shap
|
|
18
|
+
- ruff
|
|
19
|
+
- uproot
|
|
20
|
+
- awkward
|
|
21
|
+
- triggerflow
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = [ "setuptools",]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
name = "{{ cookiecutter.python_package }}"
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
dynamic = [ "version",]
|
|
10
|
+
dependencies = [ "ipython>=8.10", "jupyterlab>=3.0", "notebook", "kedro~=1.0.0",]
|
|
11
|
+
|
|
12
|
+
[project.scripts]
|
|
13
|
+
{{ cookiecutter.project_name }} = "{{ cookiecutter.python_package }}.__main__:main"
|
|
14
|
+
|
|
15
|
+
[project.optional-dependencies]
|
|
16
|
+
dev = [ "pytest-cov~=3.0", "pytest-mock>=1.7.1, <2.0", "pytest~=7.2", "ruff~=0.1.8",]
|
|
17
|
+
|
|
18
|
+
[tool.kedro]
|
|
19
|
+
package_name = "{{ cookiecutter.python_package }}"
|
|
20
|
+
project_name = "{{ cookiecutter.project_name }}"
|
|
21
|
+
kedro_init_version = "1.0.0"
|
|
22
|
+
tools = "['Linting', 'Testing', 'Custom Logging', 'Data Structure']"
|
|
23
|
+
example_pipeline = "False"
|
|
24
|
+
source_dir = "src"
|
|
25
|
+
|
|
26
|
+
[tool.ruff]
|
|
27
|
+
line-length = 88
|
|
28
|
+
show-fixes = true
|
|
29
|
+
select = [ "F", "W", "E", "I", "UP", "PL", "T201",]
|
|
30
|
+
ignore = [ "E501",]
|
|
31
|
+
|
|
32
|
+
[project.entry-points."kedro.hooks"]
|
|
33
|
+
|
|
34
|
+
[tool.pytest.ini_options]
|
|
35
|
+
addopts = "--cov-report term-missing --cov src/{{ cookiecutter.python_package }} -ra"
|
|
36
|
+
|
|
37
|
+
[tool.coverage.report]
|
|
38
|
+
fail_under = 0
|
|
39
|
+
show_missing = true
|
|
40
|
+
exclude_lines = [ "pragma: no cover", "raise NotImplementedError",]
|
|
41
|
+
|
|
42
|
+
[tool.ruff.format]
|
|
43
|
+
docstring-code-format = true
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.dynamic.version]
|
|
46
|
+
attr = "{{ cookiecutter.python_package }}.__version__"
|
|
47
|
+
|
|
48
|
+
[tool.setuptools.packages.find]
|
|
49
|
+
where = [ "src",]
|
|
50
|
+
namespaces = false
|
triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/__main__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""{{ cookiecutter.project_name }} file for ensuring the package is executable
|
|
2
|
+
as `{{ cookiecutter.project_name }}` and `python -m {{ cookiecutter.python_package }}`
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from kedro.framework.cli.utils import find_run_command
|
|
10
|
+
from kedro.framework.project import configure_project
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main(*args, **kwargs) -> Any:
|
|
14
|
+
package_name = Path(__file__).parent.name
|
|
15
|
+
configure_project(package_name)
|
|
16
|
+
|
|
17
|
+
interactive = hasattr(sys, "ps1")
|
|
18
|
+
kwargs["standalone_mode"] = not interactive
|
|
19
|
+
|
|
20
|
+
run = find_run_command(package_name)
|
|
21
|
+
return run(*args, **kwargs)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
if __name__ == "__main__":
|
|
25
|
+
main()
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from kedro.io import AbstractDataset
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class AnyObject(AbstractDataset):
|
|
6
|
+
"""
|
|
7
|
+
Abstract class which can be used for passing "Any" object
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def __init__(self):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
def _load(self) -> None:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
def _save(self, data: Any) -> Any:
|
|
17
|
+
return data
|
|
18
|
+
|
|
19
|
+
def _describe(self) -> dict:
|
|
20
|
+
return {}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import logging, uproot, json, os
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
from abc import abstractmethod
|
|
5
|
+
from fnmatch import filter as fnmatch_filter
|
|
6
|
+
from kedro.io import AbstractDataset
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BaseDataset(AbstractDataset):
|
|
10
|
+
"""
|
|
11
|
+
Abstract Base Class for loading data from ROOT files.
|
|
12
|
+
|
|
13
|
+
Users must inherit from this class and implement the abstract methods.
|
|
14
|
+
The core processing logic in `_load` is fixed and cannot be overridden.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, sample_info: str, sample_key: str):
|
|
18
|
+
with open(sample_info, "r") as f:
|
|
19
|
+
data = json.load(f)
|
|
20
|
+
self._sample_info = data[sample_key]
|
|
21
|
+
self._sample_key = sample_key
|
|
22
|
+
|
|
23
|
+
# get logger for reporting
|
|
24
|
+
self.logger = logging.getLogger(__name__)
|
|
25
|
+
self.logger.info(f"Initializing dataset: {self.__class__.__name__}")
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def get_branches_to_keep(self) -> list[str]:
|
|
29
|
+
"""
|
|
30
|
+
USER MUST IMPLEMENT: Return a list of branch names or patterns (with wildcards)
|
|
31
|
+
to keep from the ROOT file.
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
return ["Jet_*", "PuppiMET_pt", "nJet"]
|
|
35
|
+
"""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def get_cut(self) -> str | None:
|
|
40
|
+
"""
|
|
41
|
+
USER MUST IMPLEMENT: Return a string representing the cuts to apply to the data.
|
|
42
|
+
"""
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def convert_to_pandas(self, data: dict) -> pd.DataFrame:
|
|
47
|
+
"""
|
|
48
|
+
USER MUST IMPLEMENT: Convert the loaded data from a dictionary format to a pandas DataFrame.
|
|
49
|
+
"""
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
def get_tree_name(self) -> str:
|
|
53
|
+
return "Events"
|
|
54
|
+
|
|
55
|
+
def _resolve_branches(self, all_branches: list) -> list[str]:
|
|
56
|
+
"""Internal method to resolve wildcard patterns."""
|
|
57
|
+
selected = []
|
|
58
|
+
for pattern in self.get_branches_to_keep():
|
|
59
|
+
matched = fnmatch_filter(all_branches, pattern)
|
|
60
|
+
if not matched:
|
|
61
|
+
self.logger.warning(f"Pattern '{pattern}' did not match any branches.")
|
|
62
|
+
selected.extend(matched)
|
|
63
|
+
return sorted(list(set(selected)))
|
|
64
|
+
|
|
65
|
+
def _load(self) -> pd.DataFrame:
|
|
66
|
+
"""
|
|
67
|
+
CORE LOGIC (NOT OVERRIDABLE): Loads and processes a single ROOT file.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
# Process all files in sample
|
|
71
|
+
df = pd.DataFrame()
|
|
72
|
+
|
|
73
|
+
all_root_files = []
|
|
74
|
+
for key in self._sample_info.keys():
|
|
75
|
+
files = os.listdir(self._sample_info[key]["folder"])
|
|
76
|
+
cur_files = []
|
|
77
|
+
for file_pattern in self._sample_info[key]["file_pattern"]:
|
|
78
|
+
for f in fnmatch_filter(files, file_pattern):
|
|
79
|
+
cur_files.append(os.path.join(self._sample_info[key]["folder"], f))
|
|
80
|
+
all_root_files.append(cur_files)
|
|
81
|
+
|
|
82
|
+
is_signals = [
|
|
83
|
+
self._sample_info[key]["is_signal"] for key in self._sample_info.keys()
|
|
84
|
+
]
|
|
85
|
+
self.logger.info("Processing files")
|
|
86
|
+
for root_files, is_signal in zip(all_root_files, is_signals):
|
|
87
|
+
self.logger.info(f"Processing files: {root_files}")
|
|
88
|
+
for root_file in root_files:
|
|
89
|
+
if f"{root_file}" == "data/01_raw/samples_dummy.json":
|
|
90
|
+
n = 100
|
|
91
|
+
# generate dummy features
|
|
92
|
+
dummy_data = {}
|
|
93
|
+
for branch in self.get_branches_to_keep():
|
|
94
|
+
dummy_data[branch] = np.random.randn(n)
|
|
95
|
+
if is_signal:
|
|
96
|
+
dummy_data["is_signal"] = np.ones(n)
|
|
97
|
+
else:
|
|
98
|
+
dummy_data["is_signal"] = np.zeros(n)
|
|
99
|
+
|
|
100
|
+
cur_df = pd.DataFrame(dummy_data)
|
|
101
|
+
|
|
102
|
+
# generate a binary target (0/1)
|
|
103
|
+
cur_df["y"] = np.random.choice([0, 1], size=n)
|
|
104
|
+
|
|
105
|
+
df = pd.concat([df, cur_df])
|
|
106
|
+
|
|
107
|
+
else:
|
|
108
|
+
with uproot.open(f"{root_file}") as f:
|
|
109
|
+
tree = f[self.get_tree_name()]
|
|
110
|
+
all_branches = tree.keys()
|
|
111
|
+
branches_to_load = self._resolve_branches(all_branches)
|
|
112
|
+
|
|
113
|
+
if not branches_to_load:
|
|
114
|
+
self.logger.warning(
|
|
115
|
+
f"No valid branches to load for {root_file}. Skipping."
|
|
116
|
+
)
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
data = tree.arrays(branches_to_load, cut=self.get_cut())
|
|
120
|
+
|
|
121
|
+
cur_df = self.convert_to_pandas(data)
|
|
122
|
+
|
|
123
|
+
# set background or signal
|
|
124
|
+
if is_signal:
|
|
125
|
+
cur_df["is_signal"] = [1 for _ in range(len(cur_df))]
|
|
126
|
+
else:
|
|
127
|
+
cur_df["is_signal"] = [0 for _ in range(len(cur_df))]
|
|
128
|
+
|
|
129
|
+
df = pd.concat([df, cur_df])
|
|
130
|
+
|
|
131
|
+
return df
|
|
132
|
+
|
|
133
|
+
def _save(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
134
|
+
return data
|
|
135
|
+
|
|
136
|
+
def _describe(self) -> dict:
|
|
137
|
+
return {"output_sample_info": self._sample_info, "sample_key": self._sample_key}
|