triggerflow 0.1.12__py3-none-any.whl → 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. trigger_dataset/__init__.py +0 -0
  2. trigger_dataset/core.py +88 -0
  3. trigger_loader/__init__.py +0 -0
  4. trigger_loader/cluster_manager.py +107 -0
  5. trigger_loader/loader.py +95 -0
  6. trigger_loader/processor.py +211 -0
  7. triggerflow/cli.py +122 -0
  8. triggerflow/core.py +118 -114
  9. triggerflow/mlflow_wrapper.py +54 -49
  10. triggerflow/starter/.gitignore +143 -0
  11. triggerflow/starter/README.md +0 -0
  12. triggerflow/starter/cookiecutter.json +5 -0
  13. triggerflow/starter/prompts.yml +9 -0
  14. triggerflow/starter/{{ cookiecutter.repo_name }}/.dvcignore +3 -0
  15. triggerflow/starter/{{ cookiecutter.repo_name }}/.gitignore +143 -0
  16. triggerflow/starter/{{ cookiecutter.repo_name }}/.gitlab-ci.yml +56 -0
  17. triggerflow/starter/{{ cookiecutter.repo_name }}/README.md +29 -0
  18. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/README.md +26 -0
  19. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml +84 -0
  20. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters.yml +0 -0
  21. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_compile.yml +14 -0
  22. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_data_processing.yml +8 -0
  23. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_load_data.yml +5 -0
  24. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_model_training.yml +9 -0
  25. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/base/parameters_model_validation.yml +5 -0
  26. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/catalog.yml +84 -0
  27. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters.yml +0 -0
  28. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_compile.yml +14 -0
  29. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_data_processing.yml +8 -0
  30. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_load_data.yml +5 -0
  31. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_model_training.yml +9 -0
  32. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/local/parameters_model_validation.yml +5 -0
  33. triggerflow/starter/{{ cookiecutter.repo_name }}/conf/logging.yml +43 -0
  34. triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/.gitkeep +0 -0
  35. triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/samples.json +15 -0
  36. triggerflow/starter/{{ cookiecutter.repo_name }}/data/01_raw/samples_dummy.json +26 -0
  37. triggerflow/starter/{{ cookiecutter.repo_name }}/data/02_loaded/.gitkeep +0 -0
  38. triggerflow/starter/{{ cookiecutter.repo_name }}/data/03_preprocessed/.gitkeep +0 -0
  39. triggerflow/starter/{{ cookiecutter.repo_name }}/data/04_models/.gitkeep +0 -0
  40. triggerflow/starter/{{ cookiecutter.repo_name }}/data/05_validation/.gitkeep +0 -0
  41. triggerflow/starter/{{ cookiecutter.repo_name }}/data/06_compile/.gitkeep +0 -0
  42. triggerflow/starter/{{ cookiecutter.repo_name }}/data/07_reporting/.gitkeep +0 -0
  43. triggerflow/starter/{{ cookiecutter.repo_name }}/dvc.yaml +7 -0
  44. triggerflow/starter/{{ cookiecutter.repo_name }}/environment.yml +21 -0
  45. triggerflow/starter/{{ cookiecutter.repo_name }}/pyproject.toml +50 -0
  46. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/__init__.py +3 -0
  47. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/__main__.py +25 -0
  48. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/any_object.py +20 -0
  49. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/base_dataset.py +137 -0
  50. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/meta_dataset.py +88 -0
  51. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/datasets/{{ cookiecutter.python_package }}_dataset.py +35 -0
  52. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/models/__init__.py +0 -0
  53. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/models/base_model.py +155 -0
  54. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/models/{{ cookiecutter.python_package }}_model.py +16 -0
  55. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipeline_registry.py +17 -0
  56. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/compile/__init__.py +10 -0
  57. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/compile/nodes.py +50 -0
  58. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/compile/pipeline.py +10 -0
  59. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/__init__.py +10 -0
  60. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/nodes.py +40 -0
  61. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/pipeline.py +28 -0
  62. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/load_data/__init__.py +10 -0
  63. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/load_data/nodes.py +12 -0
  64. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/load_data/pipeline.py +20 -0
  65. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_training/__init__.py +10 -0
  66. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_training/nodes.py +31 -0
  67. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_training/pipeline.py +24 -0
  68. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_validation/__init__.py +10 -0
  69. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_validation/nodes.py +29 -0
  70. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/model_validation/pipeline.py +24 -0
  71. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py +46 -0
  72. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/utils/__init__.py +0 -0
  73. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/utils/metric.py +4 -0
  74. triggerflow/starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/utils/plotting.py +598 -0
  75. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/__init__.py +0 -0
  76. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/__init__.py +0 -0
  77. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/compile/__init__.py +0 -0
  78. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/compile/test_pipeline.py +9 -0
  79. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/data_processing/__init__.py +0 -0
  80. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/data_processing/test_pipeline.py +9 -0
  81. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/load_data/__init__.py +0 -0
  82. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/load_data/test_pipeline.py +9 -0
  83. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_training/__init__.py +0 -0
  84. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_training/test_pipeline.py +9 -0
  85. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_validation/__init__.py +0 -0
  86. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/pipelines/model_validation/test_pipeline.py +9 -0
  87. triggerflow/starter/{{ cookiecutter.repo_name }}/tests/test_run.py +27 -0
  88. triggerflow-0.2.dist-info/METADATA +97 -0
  89. triggerflow-0.2.dist-info/RECORD +97 -0
  90. triggerflow-0.2.dist-info/entry_points.txt +2 -0
  91. triggerflow-0.2.dist-info/top_level.txt +3 -0
  92. triggerflow-0.1.12.dist-info/METADATA +0 -61
  93. triggerflow-0.1.12.dist-info/RECORD +0 -11
  94. triggerflow-0.1.12.dist-info/top_level.txt +0 -1
  95. {triggerflow-0.1.12.dist-info → triggerflow-0.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,84 @@
1
+ {{ cookiecutter.python_package }}_meta_data:
2
+ filepath: data/01_raw/samples_dummy.json
3
+ sample_key: samples
4
+ type: {{ cookiecutter.python_package }}.datasets.meta_dataset.MetaDataset
5
+
6
+ {{ cookiecutter.python_package }}_data:
7
+ sample_info: data/01_raw/samples_dummy.json
8
+ sample_key: samples
9
+ type: {{ cookiecutter.python_package }}.datasets.{{ cookiecutter.python_package }}_dataset.{{ cookiecutter.project_name }}Dataset
10
+
11
+ {{ cookiecutter.python_package }}_meta_data_loaded:
12
+ filepath: data/02_loaded/{{ cookiecutter.python_package }}_meta_data.json
13
+ sample_key: samples
14
+ type: {{ cookiecutter.python_package }}.datasets.meta_dataset.MetaDataset
15
+
16
+ {{ cookiecutter.python_package }}_data_loaded:
17
+ filepath: data/02_loaded/{{ cookiecutter.python_package }}_data.csv
18
+ save_args:
19
+ index: False
20
+ sep: ','
21
+ type: pandas.CSVDataset
22
+
23
+ processed_{{ cookiecutter.python_package }}_X_train:
24
+ filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_X_train.csv
25
+ save_args:
26
+ index: False
27
+ sep: ','
28
+ type: pandas.CSVDataset
29
+
30
+ processed_{{ cookiecutter.python_package }}_X_test:
31
+ filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_X_test.csv
32
+ save_args:
33
+ index: False
34
+ sep: ','
35
+ type: pandas.CSVDataset
36
+
37
+ processed_{{ cookiecutter.python_package }}_y_train:
38
+ filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_y_train.csv
39
+ save_args:
40
+ index: False
41
+ sep: ','
42
+ type: pandas.CSVDataset
43
+
44
+ processed_{{ cookiecutter.python_package }}_y_test:
45
+ filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_y_test.csv
46
+ save_args:
47
+ index: False
48
+ sep: ','
49
+ type: pandas.CSVDataset
50
+
51
+ event_ids_train:
52
+ filepath: data/03_preprocessed/event_ids_train.csv
53
+ save_args:
54
+ index: False
55
+ sep: ','
56
+ type: pandas.CSVDataset
57
+
58
+ event_ids_test:
59
+ filepath: data/03_preprocessed/event_ids_test.csv
60
+ save_args:
61
+ index: False
62
+ sep: ','
63
+ type: pandas.CSVDataset
64
+
65
+ scaler:
66
+ filepath: data/03_preprocessed/scaler.pkl
67
+ type: pickle.PickleDataset
68
+
69
+ train_model:
70
+ filepath: data/04_models/trained_model.pkl
71
+ type: pickle.PickleDataset
72
+
73
+ training_history:
74
+ type: matplotlib.MatplotlibDataset
75
+ filepath: data/07_reporting/training_history.png
76
+ save_args:
77
+ format: png
78
+
79
+ model_pred:
80
+ filepath: data/05_validation/model_pred.pkl
81
+ save_args:
82
+ index: False
83
+ sep: ','
84
+ type: pandas.CSVDataset
@@ -0,0 +1,14 @@
1
+ # This is a boilerplate parameters config generated for pipeline 'compile'
2
+ # using Kedro 1.0.0
3
+ #
4
+ # Documentation for this file format can be found in "Parameters"
5
+ # Link: https://docs.kedro.org/en/1.0.0/configuration/parameters.html
6
+
7
+
8
+ compile:
9
+ name: "munet"
10
+ ml_backend: "Keras"
11
+ compiler: "hls4ml"
12
+ mlflow_url: "https://mlflow-deploy-mflow.app.cern.ch"
13
+ compiler_config:
14
+ test: 123
@@ -0,0 +1,8 @@
1
+ # This is a boilerplate parameters config generated for pipeline 'data_processing'
2
+ # using Kedro 1.0.0
3
+ #
4
+ # Documentation for this file format can be found in "Parameters"
5
+ # Link: https://docs.kedro.org/en/1.0.0/configuration/parameters.html
6
+
7
+ random_state: 42
8
+ test_size: 0.5
@@ -0,0 +1,5 @@
1
+ # This is a boilerplate parameters config generated for pipeline 'load_data'
2
+ # using Kedro 1.0.0
3
+ #
4
+ # Documentation for this file format can be found in "Parameters"
5
+ # Link: https://docs.kedro.org/en/1.0.0/configuration/parameters.html
@@ -0,0 +1,9 @@
1
+ # This is a boilerplate parameters config generated for pipeline 'model_training'
2
+ # using Kedro 1.0.0
3
+ #
4
+ # Documentation for this file format can be found in "Parameters"
5
+ # Link: https://docs.kedro.org/en/1.0.0/configuration/parameters.html
6
+
7
+ {{ cookiecutter.python_package }}_model:
8
+ hps:
9
+ name: "{{ cookiecutter.python_package }}"
@@ -0,0 +1,5 @@
1
+ # This is a boilerplate parameters config generated for pipeline 'model_validation'
2
+ # using Kedro 1.0.0
3
+ #
4
+ # Documentation for this file format can be found in "Parameters"
5
+ # Link: https://docs.kedro.org/en/1.0.0/configuration/parameters.html
@@ -0,0 +1,84 @@
1
+ {{ cookiecutter.python_package }}_meta_data:
2
+ filepath: data/01_raw/samples_dummy.json
3
+ sample_key: samples
4
+ type: {{ cookiecutter.python_package }}.datasets.meta_dataset.MetaDataset
5
+
6
+ {{ cookiecutter.python_package }}_data:
7
+ sample_info: data/01_raw/samples_dummy.json
8
+ sample_key: samples
9
+ type: {{ cookiecutter.python_package }}.datasets.{{ cookiecutter.python_package }}_dataset.{{ cookiecutter.project_name }}Dataset
10
+
11
+ {{ cookiecutter.python_package }}_meta_data_loaded:
12
+ filepath: data/02_loaded/{{ cookiecutter.python_package }}_meta_data.json
13
+ sample_key: samples
14
+ type: {{ cookiecutter.python_package }}.datasets.meta_dataset.MetaDataset
15
+
16
+ {{ cookiecutter.python_package }}_data_loaded:
17
+ filepath: data/02_loaded/{{ cookiecutter.python_package }}_data.csv
18
+ save_args:
19
+ index: False
20
+ sep: ','
21
+ type: pandas.CSVDataset
22
+
23
+ processed_{{ cookiecutter.python_package }}_X_train:
24
+ filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_X_train.csv
25
+ save_args:
26
+ index: False
27
+ sep: ','
28
+ type: pandas.CSVDataset
29
+
30
+ processed_{{ cookiecutter.python_package }}_X_test:
31
+ filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_X_test.csv
32
+ save_args:
33
+ index: False
34
+ sep: ','
35
+ type: pandas.CSVDataset
36
+
37
+ processed_{{ cookiecutter.python_package }}_y_train:
38
+ filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_y_train.csv
39
+ save_args:
40
+ index: False
41
+ sep: ','
42
+ type: pandas.CSVDataset
43
+
44
+ processed_{{ cookiecutter.python_package }}_y_test:
45
+ filepath: data/03_preprocessed/processed_{{ cookiecutter.python_package }}_y_test.csv
46
+ save_args:
47
+ index: False
48
+ sep: ','
49
+ type: pandas.CSVDataset
50
+
51
+ event_ids_train:
52
+ filepath: data/03_preprocessed/event_ids_train.csv
53
+ save_args:
54
+ index: False
55
+ sep: ','
56
+ type: pandas.CSVDataset
57
+
58
+ event_ids_test:
59
+ filepath: data/03_preprocessed/event_ids_test.csv
60
+ save_args:
61
+ index: False
62
+ sep: ','
63
+ type: pandas.CSVDataset
64
+
65
+ scaler:
66
+ filepath: data/03_preprocessed/scaler.pkl
67
+ type: pickle.PickleDataset
68
+
69
+ train_model:
70
+ filepath: data/04_models/trained_model.pkl
71
+ type: pickle.PickleDataset
72
+
73
+ training_history:
74
+ type: matplotlib.MatplotlibDataset
75
+ filepath: data/07_reporting/training_history.png
76
+ save_args:
77
+ format: png
78
+
79
+ model_pred:
80
+ filepath: data/05_validation/model_pred.pkl
81
+ save_args:
82
+ index: False
83
+ sep: ','
84
+ type: pandas.CSVDataset
@@ -0,0 +1,14 @@
1
+ # This is a boilerplate parameters config generated for pipeline 'compile'
2
+ # using Kedro 0.19.14.
3
+ #
4
+ # Documentation for this file format can be found in "Parameters"
5
+ # Link: https://docs.kedro.org/en/0.19.14/configuration/parameters.html
6
+
7
+
8
+ compile:
9
+ name: "munet"
10
+ ml_backend: "Keras"
11
+ compiler: "hls4ml"
12
+ mlflow_url: "https://mlflow-deploy-mflow.app.cern.ch"
13
+ compiler_config:
14
+ test: 123
@@ -0,0 +1,8 @@
1
+ # This is a boilerplate parameters config generated for pipeline 'data_processing'
2
+ # using Kedro 0.19.14.
3
+ #
4
+ # Documentation for this file format can be found in "Parameters"
5
+ # Link: https://docs.kedro.org/en/0.19.14/configuration/parameters.html
6
+
7
+ random_state: 42
8
+ test_size: 0.5
@@ -0,0 +1,5 @@
1
+ # This is a boilerplate parameters config generated for pipeline 'load_data'
2
+ # using Kedro 0.19.14.
3
+ #
4
+ # Documentation for this file format can be found in "Parameters"
5
+ # Link: https://docs.kedro.org/en/0.19.14/configuration/parameters.html
@@ -0,0 +1,9 @@
1
+ # This is a boilerplate parameters config generated for pipeline 'model_training'
2
+ # using Kedro 1.0.0
3
+ #
4
+ # Documentation for this file format can be found in "Parameters"
5
+ # Link: https://docs.kedro.org/en/1.0.0/configuration/parameters.html
6
+
7
+ {{ cookiecutter.python_package }}_model:
8
+ hps:
9
+ name: "{{ cookiecutter.python_package }}"
@@ -0,0 +1,5 @@
1
+ # This is a boilerplate parameters config generated for pipeline 'model_validation'
2
+ # using Kedro 1.0.0
3
+ #
4
+ # Documentation for this file format can be found in "Parameters"
5
+ # Link: https://docs.kedro.org/en/0.19.14/configuration/parameters.html
@@ -0,0 +1,43 @@
1
+ # To enable this custom logging configuration, set KEDRO_LOGGING_CONFIG to the path of this file.
2
+ # More information available at https://docs.kedro.org/en/stable/logging/logging.html
3
+ version: 1
4
+
5
+ disable_existing_loggers: False
6
+
7
+ formatters:
8
+ simple:
9
+ format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
10
+
11
+ handlers:
12
+ console:
13
+ class: logging.StreamHandler
14
+ level: INFO
15
+ formatter: simple
16
+ stream: ext://sys.stdout
17
+
18
+ info_file_handler:
19
+ class: logging.handlers.RotatingFileHandler
20
+ level: INFO
21
+ formatter: simple
22
+ filename: info.log
23
+ maxBytes: 10485760 # 10MB
24
+ backupCount: 20
25
+ encoding: utf8
26
+ delay: True
27
+
28
+ rich:
29
+ class: kedro.logging.RichHandler
30
+ rich_tracebacks: True
31
+ # Advance options for customisation.
32
+ # See https://docs.kedro.org/en/stable/logging/logging.html#project-side-logging-configuration
33
+ # tracebacks_show_locals: False
34
+
35
+ loggers:
36
+ kedro:
37
+ level: INFO
38
+
39
+ {{ cookiecutter.python_package }}:
40
+ level: INFO
41
+
42
+ root:
43
+ handlers: [rich, info_file_handler]
@@ -0,0 +1,15 @@
1
+ {
2
+ "samples" : {
3
+ "testSample" : {
4
+ "all_file_path":"data/01_raw/test.root",
5
+ "path":"data/01_raw/test.root",
6
+ "file_pattern":["test.root"],
7
+ "DAS" : "Blabla",
8
+ "type" : "123",
9
+ "data" : false,
10
+ "era" : "phase1",
11
+ "run" : "run3",
12
+ "is_signal": true
13
+ }
14
+ }
15
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "samples" : {
3
+ "muon24I" : {
4
+ "path": "data/01_raw/samples_dummy.json",
5
+ "folder":"data/01_raw/",
6
+ "file_pattern":["samples_dummy.json"],
7
+ "DAS" : "-",
8
+ "type" : "-",
9
+ "data" : true,
10
+ "era" : "phase1",
11
+ "run" : "run3",
12
+ "is_signal": true
13
+ },
14
+ "zb24I" : {
15
+ "path": "data/01_raw/samples_dummy.json",
16
+ "folder":"data/01_raw/",
17
+ "file_pattern":["samples_dummy.json"],
18
+ "DAS" : "-",
19
+ "type" : "-",
20
+ "data" : true,
21
+ "era" : "phase1",
22
+ "run" : "run3",
23
+ "is_signal": false
24
+ }
25
+ }
26
+ }
@@ -0,0 +1,7 @@
1
+ stages:
2
+ process_samples:
3
+ cmd:
4
+ deps:
5
+ -
6
+ outs:
7
+ -
@@ -0,0 +1,21 @@
1
+ channels:
2
+ - conda-forge
3
+ - defaults
4
+ dependencies:
5
+ - python=3.11
6
+ - pip
7
+ - pip:
8
+ - kedro
9
+ - kedro-viz
10
+ - kedro-datasets
11
+ - matplotlib
12
+ - mplhep
13
+ - shap
14
+ - scikit-learn
15
+ - pandas
16
+ - dvc
17
+ - shap
18
+ - ruff
19
+ - uproot
20
+ - awkward
21
+ - triggerflow
@@ -0,0 +1,50 @@
1
+ [build-system]
2
+ requires = [ "setuptools",]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ requires-python = ">=3.10"
7
+ name = "{{ cookiecutter.python_package }}"
8
+ readme = "README.md"
9
+ dynamic = [ "version",]
10
+ dependencies = [ "ipython>=8.10", "jupyterlab>=3.0", "notebook", "kedro~=1.0.0",]
11
+
12
+ [project.scripts]
13
+ {{ cookiecutter.project_name }} = "{{ cookiecutter.python_package }}.__main__:main"
14
+
15
+ [project.optional-dependencies]
16
+ dev = [ "pytest-cov~=3.0", "pytest-mock>=1.7.1, <2.0", "pytest~=7.2", "ruff~=0.1.8",]
17
+
18
+ [tool.kedro]
19
+ package_name = "{{ cookiecutter.python_package }}"
20
+ project_name = "{{ cookiecutter.project_name }}"
21
+ kedro_init_version = "1.0.0"
22
+ tools = "['Linting', 'Testing', 'Custom Logging', 'Data Structure']"
23
+ example_pipeline = "False"
24
+ source_dir = "src"
25
+
26
+ [tool.ruff]
27
+ line-length = 88
28
+ show-fixes = true
29
+ select = [ "F", "W", "E", "I", "UP", "PL", "T201",]
30
+ ignore = [ "E501",]
31
+
32
+ [project.entry-points."kedro.hooks"]
33
+
34
+ [tool.pytest.ini_options]
35
+ addopts = "--cov-report term-missing --cov src/{{ cookiecutter.python_package }} -ra"
36
+
37
+ [tool.coverage.report]
38
+ fail_under = 0
39
+ show_missing = true
40
+ exclude_lines = [ "pragma: no cover", "raise NotImplementedError",]
41
+
42
+ [tool.ruff.format]
43
+ docstring-code-format = true
44
+
45
+ [tool.setuptools.dynamic.version]
46
+ attr = "{{ cookiecutter.python_package }}.__version__"
47
+
48
+ [tool.setuptools.packages.find]
49
+ where = [ "src",]
50
+ namespaces = false
@@ -0,0 +1,3 @@
1
+ """{{ cookiecutter.project_name }}"""
2
+
3
+ __version__ = "0.1"
@@ -0,0 +1,25 @@
1
+ """{{ cookiecutter.project_name }} file for ensuring the package is executable
2
+ as `{{ cookiecutter.project_name }}` and `python -m {{ cookiecutter.python_package }}`
3
+ """
4
+
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from kedro.framework.cli.utils import find_run_command
10
+ from kedro.framework.project import configure_project
11
+
12
+
13
+ def main(*args, **kwargs) -> Any:
14
+ package_name = Path(__file__).parent.name
15
+ configure_project(package_name)
16
+
17
+ interactive = hasattr(sys, "ps1")
18
+ kwargs["standalone_mode"] = not interactive
19
+
20
+ run = find_run_command(package_name)
21
+ return run(*args, **kwargs)
22
+
23
+
24
+ if __name__ == "__main__":
25
+ main()
@@ -0,0 +1,20 @@
1
+ from kedro.io import AbstractDataset
2
+ from typing import Any
3
+
4
+
5
+ class AnyObject(AbstractDataset):
6
+ """
7
+ Abstract class which can be used for passing "Any" object
8
+ """
9
+
10
+ def __init__(self):
11
+ pass
12
+
13
+ def _load(self) -> None:
14
+ pass
15
+
16
+ def _save(self, data: Any) -> Any:
17
+ return data
18
+
19
+ def _describe(self) -> dict:
20
+ return {}
@@ -0,0 +1,137 @@
1
+ import logging, uproot, json, os
2
+ import pandas as pd
3
+ import numpy as np
4
+ from abc import abstractmethod
5
+ from fnmatch import filter as fnmatch_filter
6
+ from kedro.io import AbstractDataset
7
+
8
+
9
+ class BaseDataset(AbstractDataset):
10
+ """
11
+ Abstract Base Class for loading data from ROOT files.
12
+
13
+ Users must inherit from this class and implement the abstract methods.
14
+ The core processing logic in `_load` is fixed and cannot be overridden.
15
+ """
16
+
17
+ def __init__(self, sample_info: str, sample_key: str):
18
+ with open(sample_info, "r") as f:
19
+ data = json.load(f)
20
+ self._sample_info = data[sample_key]
21
+ self._sample_key = sample_key
22
+
23
+ # get logger for reporting
24
+ self.logger = logging.getLogger(__name__)
25
+ self.logger.info(f"Initializing dataset: {self.__class__.__name__}")
26
+
27
+ @abstractmethod
28
+ def get_branches_to_keep(self) -> list[str]:
29
+ """
30
+ USER MUST IMPLEMENT: Return a list of branch names or patterns (with wildcards)
31
+ to keep from the ROOT file.
32
+
33
+ Example:
34
+ return ["Jet_*", "PuppiMET_pt", "nJet"]
35
+ """
36
+ pass
37
+
38
+ @abstractmethod
39
+ def get_cut(self) -> str | None:
40
+ """
41
+ USER MUST IMPLEMENT: Return a string representing the cuts to apply to the data.
42
+ """
43
+ pass
44
+
45
+ @abstractmethod
46
+ def convert_to_pandas(self, data: dict) -> pd.DataFrame:
47
+ """
48
+ USER MUST IMPLEMENT: Convert the loaded data from a dictionary format to a pandas DataFrame.
49
+ """
50
+ pass
51
+
52
+ def get_tree_name(self) -> str:
53
+ return "Events"
54
+
55
+ def _resolve_branches(self, all_branches: list) -> list[str]:
56
+ """Internal method to resolve wildcard patterns."""
57
+ selected = []
58
+ for pattern in self.get_branches_to_keep():
59
+ matched = fnmatch_filter(all_branches, pattern)
60
+ if not matched:
61
+ self.logger.warning(f"Pattern '{pattern}' did not match any branches.")
62
+ selected.extend(matched)
63
+ return sorted(list(set(selected)))
64
+
65
+ def _load(self) -> pd.DataFrame:
66
+ """
67
+ CORE LOGIC (NOT OVERRIDABLE): Loads and processes a single ROOT file.
68
+ """
69
+
70
+ # Process all files in sample
71
+ df = pd.DataFrame()
72
+
73
+ all_root_files = []
74
+ for key in self._sample_info.keys():
75
+ files = os.listdir(self._sample_info[key]["folder"])
76
+ cur_files = []
77
+ for file_pattern in self._sample_info[key]["file_pattern"]:
78
+ for f in fnmatch_filter(files, file_pattern):
79
+ cur_files.append(os.path.join(self._sample_info[key]["folder"], f))
80
+ all_root_files.append(cur_files)
81
+
82
+ is_signals = [
83
+ self._sample_info[key]["is_signal"] for key in self._sample_info.keys()
84
+ ]
85
+ self.logger.info("Processing files")
86
+ for root_files, is_signal in zip(all_root_files, is_signals):
87
+ self.logger.info(f"Processing files: {root_files}")
88
+ for root_file in root_files:
89
+ if f"{root_file}" == "data/01_raw/samples_dummy.json":
90
+ n = 100
91
+ # generate dummy features
92
+ dummy_data = {}
93
+ for branch in self.get_branches_to_keep():
94
+ dummy_data[branch] = np.random.randn(n)
95
+ if is_signal:
96
+ dummy_data["is_signal"] = np.ones(n)
97
+ else:
98
+ dummy_data["is_signal"] = np.zeros(n)
99
+
100
+ cur_df = pd.DataFrame(dummy_data)
101
+
102
+ # generate a binary target (0/1)
103
+ cur_df["y"] = np.random.choice([0, 1], size=n)
104
+
105
+ df = pd.concat([df, cur_df])
106
+
107
+ else:
108
+ with uproot.open(f"{root_file}") as f:
109
+ tree = f[self.get_tree_name()]
110
+ all_branches = tree.keys()
111
+ branches_to_load = self._resolve_branches(all_branches)
112
+
113
+ if not branches_to_load:
114
+ self.logger.warning(
115
+ f"No valid branches to load for {root_file}. Skipping."
116
+ )
117
+ continue
118
+
119
+ data = tree.arrays(branches_to_load, cut=self.get_cut())
120
+
121
+ cur_df = self.convert_to_pandas(data)
122
+
123
+ # set background or signal
124
+ if is_signal:
125
+ cur_df["is_signal"] = [1 for _ in range(len(cur_df))]
126
+ else:
127
+ cur_df["is_signal"] = [0 for _ in range(len(cur_df))]
128
+
129
+ df = pd.concat([df, cur_df])
130
+
131
+ return df
132
+
133
+ def _save(self, data: pd.DataFrame) -> pd.DataFrame:
134
+ return data
135
+
136
+ def _describe(self) -> dict:
137
+ return {"output_sample_info": self._sample_info, "sample_key": self._sample_key}