umami-preprocessing 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/PKG-INFO +10 -10
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/pyproject.toml +8 -8
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/umami_preprocessing.egg-info/PKG-INFO +10 -10
- umami_preprocessing-0.2.4/umami_preprocessing.egg-info/requires.txt +15 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/__init__.py +1 -1
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/classes/preprocessing_config.py +34 -16
- umami_preprocessing-0.2.4/upp/logger.py +76 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/main.py +15 -6
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/stages/hist.py +47 -6
- umami_preprocessing-0.2.4/upp/stages/merging.py +307 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/stages/normalisation.py +9 -2
- umami_preprocessing-0.2.4/upp/stages/plot.py +198 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/stages/resampling.py +192 -74
- umami_preprocessing-0.2.2/umami_preprocessing.egg-info/requires.txt +0 -15
- umami_preprocessing-0.2.2/upp/logger.py +0 -39
- umami_preprocessing-0.2.2/upp/stages/merging.py +0 -176
- umami_preprocessing-0.2.2/upp/stages/plot.py +0 -325
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/README.md +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/setup.cfg +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/umami_preprocessing.egg-info/SOURCES.txt +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/umami_preprocessing.egg-info/dependency_links.txt +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/umami_preprocessing.egg-info/entry_points.txt +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/umami_preprocessing.egg-info/top_level.txt +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/classes/__init__.py +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/classes/components.py +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/classes/region.py +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/classes/resampling_config.py +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/classes/variable_config.py +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/stages/__init__.py +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/stages/interpolation.py +0 -0
- {umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/upp/utils.py +0 -0
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: umami-preprocessing
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Preprocessing for jet tagging
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/umami-hep/umami-preprocessing
|
|
7
7
|
Requires-Python: <3.12,>=3.8
|
|
8
8
|
Description-Content-Type: text/markdown
|
|
9
9
|
Requires-Dist: pyyaml-include==1.3
|
|
10
|
-
Requires-Dist: PyYAML
|
|
10
|
+
Requires-Dist: PyYAML>=6.0.1
|
|
11
11
|
Requires-Dist: rich==12.6.0
|
|
12
|
-
Requires-Dist: scipy
|
|
13
|
-
Requires-Dist: puma-hep==0.4.
|
|
14
|
-
Requires-Dist: atlas-ftag-tools==0.2.
|
|
12
|
+
Requires-Dist: scipy>=1.15.2
|
|
13
|
+
Requires-Dist: puma-hep==0.4.5
|
|
14
|
+
Requires-Dist: atlas-ftag-tools==0.2.10
|
|
15
15
|
Requires-Dist: dotmap==1.3.30
|
|
16
16
|
Provides-Extra: dev
|
|
17
|
-
Requires-Dist: ruff==0.
|
|
18
|
-
Requires-Dist: mypy==1.
|
|
17
|
+
Requires-Dist: ruff==0.6.2; extra == "dev"
|
|
18
|
+
Requires-Dist: mypy==1.11.2; extra == "dev"
|
|
19
19
|
Requires-Dist: pre-commit==3.5.0; extra == "dev"
|
|
20
|
-
Requires-Dist: pytest>=7.
|
|
20
|
+
Requires-Dist: pytest>=7.2.2; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
21
22
|
Requires-Dist: pytest-mock==3.11.1; extra == "dev"
|
|
22
|
-
Requires-Dist: pytest-cov>=3.0.0; extra == "dev"
|
|
23
23
|
|
|
24
24
|
[](https://github.com/psf/black)
|
|
25
25
|
[](https://codecov.io/gh/umami-hep/umami-preprocessing)
|
|
@@ -8,22 +8,22 @@ requires-python = "<3.12,>=3.8"
|
|
|
8
8
|
|
|
9
9
|
dependencies = [
|
|
10
10
|
"pyyaml-include==1.3",
|
|
11
|
-
"PyYAML
|
|
11
|
+
"PyYAML>=6.0.1",
|
|
12
12
|
"rich==12.6.0",
|
|
13
|
-
"scipy
|
|
14
|
-
"puma-hep==0.4.
|
|
15
|
-
"atlas-ftag-tools==0.2.
|
|
13
|
+
"scipy>=1.15.2",
|
|
14
|
+
"puma-hep==0.4.5",
|
|
15
|
+
"atlas-ftag-tools==0.2.10",
|
|
16
16
|
"dotmap==1.3.30"
|
|
17
17
|
]
|
|
18
18
|
|
|
19
19
|
[project.optional-dependencies]
|
|
20
20
|
dev = [
|
|
21
|
-
"ruff==0.
|
|
22
|
-
"mypy==1.
|
|
21
|
+
"ruff==0.6.2",
|
|
22
|
+
"mypy==1.11.2",
|
|
23
23
|
"pre-commit==3.5.0",
|
|
24
|
-
"pytest>=7.
|
|
24
|
+
"pytest>=7.2.2",
|
|
25
|
+
"pytest-cov>=4.0.0",
|
|
25
26
|
"pytest-mock==3.11.1",
|
|
26
|
-
"pytest-cov>=3.0.0",
|
|
27
27
|
]
|
|
28
28
|
|
|
29
29
|
[project.urls]
|
{umami_preprocessing-0.2.2 → umami_preprocessing-0.2.4}/umami_preprocessing.egg-info/PKG-INFO
RENAMED
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: umami-preprocessing
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Preprocessing for jet tagging
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/umami-hep/umami-preprocessing
|
|
7
7
|
Requires-Python: <3.12,>=3.8
|
|
8
8
|
Description-Content-Type: text/markdown
|
|
9
9
|
Requires-Dist: pyyaml-include==1.3
|
|
10
|
-
Requires-Dist: PyYAML
|
|
10
|
+
Requires-Dist: PyYAML>=6.0.1
|
|
11
11
|
Requires-Dist: rich==12.6.0
|
|
12
|
-
Requires-Dist: scipy
|
|
13
|
-
Requires-Dist: puma-hep==0.4.
|
|
14
|
-
Requires-Dist: atlas-ftag-tools==0.2.
|
|
12
|
+
Requires-Dist: scipy>=1.15.2
|
|
13
|
+
Requires-Dist: puma-hep==0.4.5
|
|
14
|
+
Requires-Dist: atlas-ftag-tools==0.2.10
|
|
15
15
|
Requires-Dist: dotmap==1.3.30
|
|
16
16
|
Provides-Extra: dev
|
|
17
|
-
Requires-Dist: ruff==0.
|
|
18
|
-
Requires-Dist: mypy==1.
|
|
17
|
+
Requires-Dist: ruff==0.6.2; extra == "dev"
|
|
18
|
+
Requires-Dist: mypy==1.11.2; extra == "dev"
|
|
19
19
|
Requires-Dist: pre-commit==3.5.0; extra == "dev"
|
|
20
|
-
Requires-Dist: pytest>=7.
|
|
20
|
+
Requires-Dist: pytest>=7.2.2; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
21
22
|
Requires-Dist: pytest-mock==3.11.1; extra == "dev"
|
|
22
|
-
Requires-Dist: pytest-cov>=3.0.0; extra == "dev"
|
|
23
23
|
|
|
24
24
|
[](https://github.com/psf/black)
|
|
25
25
|
[](https://codecov.io/gh/umami-hep/umami-preprocessing)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
pyyaml-include==1.3
|
|
2
|
+
PyYAML>=6.0.1
|
|
3
|
+
rich==12.6.0
|
|
4
|
+
scipy>=1.15.2
|
|
5
|
+
puma-hep==0.4.5
|
|
6
|
+
atlas-ftag-tools==0.2.10
|
|
7
|
+
dotmap==1.3.30
|
|
8
|
+
|
|
9
|
+
[dev]
|
|
10
|
+
ruff==0.6.2
|
|
11
|
+
mypy==1.11.2
|
|
12
|
+
pre-commit==3.5.0
|
|
13
|
+
pytest>=7.2.2
|
|
14
|
+
pytest-cov>=4.0.0
|
|
15
|
+
pytest-mock==3.11.1
|
|
@@ -53,42 +53,56 @@ class PreprocessingConfig:
|
|
|
53
53
|
|
|
54
54
|
Parameters
|
|
55
55
|
----------
|
|
56
|
+
config_path : Path
|
|
57
|
+
Path to the config yaml file that is used. Does not need to be set in config.
|
|
58
|
+
split : Split
|
|
59
|
+
For which part the preprocessing is run. Either train, val or test. This needs
|
|
60
|
+
to be set as a command line argument when running the programm. Does not need
|
|
61
|
+
to be set in config.
|
|
62
|
+
config : dict
|
|
63
|
+
Dict with the loaded config. Does not need to be set in config.
|
|
56
64
|
base_dir : Path
|
|
57
65
|
Base directory for all other paths.
|
|
58
|
-
ntuple_dir : Path
|
|
66
|
+
ntuple_dir : Path, optional
|
|
59
67
|
Directory containing the input h5 ntuples. If a relative path is given, it is
|
|
60
|
-
interpreted as relative to base_dir.
|
|
61
|
-
components_dir : Path
|
|
68
|
+
interpreted as relative to base_dir. By default Path("ntuples")
|
|
69
|
+
components_dir : Path, optional
|
|
62
70
|
Directory for intermediate component files. If a relative path is given, it is
|
|
63
|
-
interpreted as relative to base_dir.
|
|
64
|
-
out_dir : Path
|
|
71
|
+
interpreted as relative to base_dir. By default Path("components")
|
|
72
|
+
out_dir : Path, optional
|
|
65
73
|
Directory for output files. If a relative path is given, it is interpreted as
|
|
66
|
-
relative to base_dir.
|
|
67
|
-
out_fname : Path
|
|
68
|
-
Filename stem for the output files.
|
|
69
|
-
batch_size : int
|
|
74
|
+
relative to base_dir. By default Path("output")
|
|
75
|
+
out_fname : Path, optional
|
|
76
|
+
Filename stem for the output files. By default Path("pp_output.h5")
|
|
77
|
+
batch_size : int, optional
|
|
70
78
|
Batch size for the preprocessing. For each batch select
|
|
71
79
|
`sampling_fraction*batch_size_after_cuts`. It is recommended to choose high batch sizes
|
|
72
80
|
especially to the `countup` method to achive best agreement of target and resampled
|
|
73
|
-
distributions.
|
|
74
|
-
num_jets_estimate : int
|
|
81
|
+
distributions. By default 100_000
|
|
82
|
+
num_jets_estimate : int, optional
|
|
75
83
|
Any of the further three arguments that are not specified will default to this value
|
|
76
84
|
Is equal to 1_000_000 by default.
|
|
77
|
-
num_jets_estimate_available : int
|
|
85
|
+
num_jets_estimate_available : int, optional
|
|
78
86
|
A sabsample taken from the whole sample to estimate the number of jets after the cuts.
|
|
79
87
|
Please keep this number high in order to not get poisson error of more then 5%.
|
|
80
88
|
If time allows you can use -1 to get a precise number of jets and not just an estimate
|
|
81
89
|
although it will be slow for large datasets. Is equal to num_jets_estimate by default.
|
|
82
|
-
num_jets_estimate_hist : int
|
|
90
|
+
num_jets_estimate_hist : int, optional
|
|
83
91
|
Number of jets of each flavour that are used to construct histograms for probability
|
|
84
92
|
density function estimation. Larger numbers give a better quality estmate of the pdfs.
|
|
85
93
|
Is equal to num_jets_estimate by default.
|
|
86
|
-
num_jets_estimate_norm : int
|
|
94
|
+
num_jets_estimate_norm : int, optional
|
|
87
95
|
Number of jets of each flavour that are used to estimate shifting and scaling during
|
|
88
96
|
normalisation step. Larger numbers give a better quality estmates.
|
|
89
97
|
Is equal to num_jets_estimate by default.
|
|
90
|
-
|
|
91
|
-
|
|
98
|
+
num_jets_estimate_plotting : int, optional
|
|
99
|
+
Number of jets of each flavour used for plotting the initial and the final resampling
|
|
100
|
+
variable distributions. Larger numbers give a better estimate of the full distributions.
|
|
101
|
+
Is equal to num_jets_estimate by default.
|
|
102
|
+
merge_test_samples : bool, optional
|
|
103
|
+
Merge the test samples of the different processes into one file. By default False.
|
|
104
|
+
jets_name : str, optional
|
|
105
|
+
Name of the jets dataset in the input file. By default "jets".
|
|
92
106
|
"""
|
|
93
107
|
|
|
94
108
|
config_path: Path
|
|
@@ -104,9 +118,11 @@ class PreprocessingConfig:
|
|
|
104
118
|
num_jets_estimate_available: int | None = None
|
|
105
119
|
num_jets_estimate_hist: int | None = None
|
|
106
120
|
num_jets_estimate_norm: int | None = None
|
|
121
|
+
num_jets_estimate_plotting: int | None = None
|
|
107
122
|
merge_test_samples: bool = False
|
|
108
123
|
jets_name: str = "jets"
|
|
109
124
|
flavour_config: Path | None = None
|
|
125
|
+
num_jets_per_output_file: int | None = None
|
|
110
126
|
|
|
111
127
|
def __post_init__(self):
|
|
112
128
|
# postprocess paths
|
|
@@ -117,6 +133,8 @@ class PreprocessingConfig:
|
|
|
117
133
|
self.num_jets_estimate_hist = self.num_jets_estimate
|
|
118
134
|
if self.num_jets_estimate_norm is None:
|
|
119
135
|
self.num_jets_estimate_norm = self.num_jets_estimate
|
|
136
|
+
if self.num_jets_estimate_plotting is None:
|
|
137
|
+
self.num_jets_estimate_plotting = self.num_jets_estimate
|
|
120
138
|
|
|
121
139
|
for field in dataclasses.fields(self):
|
|
122
140
|
if field.type == "Path" and field.name != "out_fname" and field.name != "base_dir":
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
from functools import partial
|
|
6
|
+
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.logging import RichHandler
|
|
9
|
+
from rich.progress import (
|
|
10
|
+
BarColumn,
|
|
11
|
+
Progress,
|
|
12
|
+
TextColumn,
|
|
13
|
+
TimeElapsedColumn,
|
|
14
|
+
TimeRemainingColumn,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Detect if the program is executed in an interactive terminal
|
|
18
|
+
_IS_TTY = sys.stderr.isatty()
|
|
19
|
+
|
|
20
|
+
# One console object is reused everywhere so that Rich keeps a consistent idea
|
|
21
|
+
# of whether it may emit ANSI control codes / animations.
|
|
22
|
+
_console = Console(
|
|
23
|
+
width=100,
|
|
24
|
+
force_terminal=_IS_TTY,
|
|
25
|
+
force_interactive=_IS_TTY,
|
|
26
|
+
no_color=not _IS_TTY,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Template for the progress bar
|
|
30
|
+
ProgressBar = partial(
|
|
31
|
+
Progress,
|
|
32
|
+
TextColumn("[task.description]{task.description}"),
|
|
33
|
+
BarColumn(),
|
|
34
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
35
|
+
TextColumn("•"),
|
|
36
|
+
TimeRemainingColumn(),
|
|
37
|
+
TextColumn("•"),
|
|
38
|
+
TimeElapsedColumn(),
|
|
39
|
+
refresh_per_second=1 if _IS_TTY else 0.05,
|
|
40
|
+
speed_estimate_period=30 if _IS_TTY else 120,
|
|
41
|
+
console=_console,
|
|
42
|
+
disable=not _IS_TTY,
|
|
43
|
+
transient=_IS_TTY,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Helper for setup the logger
|
|
48
|
+
def setup_logger(level: str = "INFO"):
|
|
49
|
+
"""Set up the logger.
|
|
50
|
+
|
|
51
|
+
Configure Rich logging so that colourful / interactive output is used when
|
|
52
|
+
the program is attached to a terminal and plain text is written when it is
|
|
53
|
+
executed under a batch system such as Slurm (where stdout / stderr are files).
|
|
54
|
+
"""
|
|
55
|
+
FORMAT = "%(message)s"
|
|
56
|
+
|
|
57
|
+
# In a batch job we create a console that never emits colour codes.
|
|
58
|
+
console = None
|
|
59
|
+
if not _IS_TTY:
|
|
60
|
+
console = Console(
|
|
61
|
+
width=120,
|
|
62
|
+
force_terminal=False,
|
|
63
|
+
force_interactive=False,
|
|
64
|
+
no_color=True,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
handler = RichHandler(
|
|
68
|
+
show_time=False,
|
|
69
|
+
show_path=False,
|
|
70
|
+
markup=True,
|
|
71
|
+
rich_tracebacks=True,
|
|
72
|
+
console=console,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
logging.basicConfig(level=level, format=FORMAT, handlers=[handler])
|
|
76
|
+
return logging
|
|
@@ -21,7 +21,7 @@ from upp.logger import setup_logger
|
|
|
21
21
|
from upp.stages.hist import create_histograms
|
|
22
22
|
from upp.stages.merging import Merging
|
|
23
23
|
from upp.stages.normalisation import Normalisation
|
|
24
|
-
from upp.stages.plot import
|
|
24
|
+
from upp.stages.plot import plot_resampling_dists
|
|
25
25
|
from upp.stages.resampling import Resampling
|
|
26
26
|
|
|
27
27
|
|
|
@@ -41,10 +41,16 @@ def parse_args(args):
|
|
|
41
41
|
parser.add_argument("--no-plot", dest="plot", action="store_false")
|
|
42
42
|
splits = ["train", "val", "test", "all"]
|
|
43
43
|
parser.add_argument("--split", default="train", choices=splits, help="Which file to produce")
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--component", default=None, help="Component which is processed during --prep"
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"--region", default=None, help="Region which is processed during --resample"
|
|
49
|
+
)
|
|
44
50
|
|
|
45
51
|
args = parser.parse_args(args)
|
|
46
52
|
d = vars(args)
|
|
47
|
-
ignore = ["config", "split"]
|
|
53
|
+
ignore = ["config", "split", "component", "region"]
|
|
48
54
|
if not any(v for a, v in d.items() if a not in ignore):
|
|
49
55
|
for v in d:
|
|
50
56
|
if v not in ignore and d[v] is None:
|
|
@@ -65,12 +71,15 @@ def run_pp(args) -> None:
|
|
|
65
71
|
|
|
66
72
|
# create virtual datasets and pdf files
|
|
67
73
|
if args.prep and args.split == "train":
|
|
68
|
-
create_histograms(
|
|
74
|
+
create_histograms(
|
|
75
|
+
config=config,
|
|
76
|
+
component_to_run=args.component,
|
|
77
|
+
)
|
|
69
78
|
|
|
70
79
|
# run the resampling
|
|
71
80
|
if args.resample:
|
|
72
81
|
resampling = Resampling(config)
|
|
73
|
-
resampling.run()
|
|
82
|
+
resampling.run(region=args.region, component=args.component)
|
|
74
83
|
|
|
75
84
|
# run the merging
|
|
76
85
|
if args.merge:
|
|
@@ -86,8 +95,8 @@ def run_pp(args) -> None:
|
|
|
86
95
|
if args.plot:
|
|
87
96
|
title = " Plotting "
|
|
88
97
|
log.info(f"[bold green]{title:-^100}")
|
|
89
|
-
|
|
90
|
-
|
|
98
|
+
plot_resampling_dists(config=config, stage="initial")
|
|
99
|
+
plot_resampling_dists(config=config, stage=args.split)
|
|
91
100
|
|
|
92
101
|
# print end info
|
|
93
102
|
end = datetime.now()
|
|
@@ -5,6 +5,7 @@ import logging as log
|
|
|
5
5
|
import math
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from pathlib import Path
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
8
9
|
|
|
9
10
|
import h5py
|
|
10
11
|
import numpy as np
|
|
@@ -13,6 +14,9 @@ from scipy.stats import binned_statistic_dd
|
|
|
13
14
|
|
|
14
15
|
from upp.logger import setup_logger
|
|
15
16
|
|
|
17
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
18
|
+
from upp.classes.preprocessing_config import PreprocessingConfig
|
|
19
|
+
|
|
16
20
|
|
|
17
21
|
def bin_jets(array: dict, bins: list) -> np.ndarray:
|
|
18
22
|
"""Create the histogram and bins for the given resampling variables.
|
|
@@ -117,24 +121,39 @@ class Hist:
|
|
|
117
121
|
return f["pbin"][:]
|
|
118
122
|
|
|
119
123
|
|
|
120
|
-
def create_histograms(
|
|
124
|
+
def create_histograms(
|
|
125
|
+
config: PreprocessingConfig,
|
|
126
|
+
component_to_run: str | None = None,
|
|
127
|
+
) -> None:
|
|
121
128
|
"""Create the virtual datasets and pdf files.
|
|
122
129
|
|
|
123
130
|
Parameters
|
|
124
131
|
----------
|
|
125
132
|
config : PreprocessingConfig object
|
|
126
133
|
PreprocessingConfig object of the current preprocessing.
|
|
134
|
+
component_to_run : str | None
|
|
135
|
+
Component which should be run. By default (None), all components
|
|
136
|
+
are processed sequentially.
|
|
127
137
|
"""
|
|
138
|
+
# Setup the logger and load the variables used for resampling
|
|
128
139
|
setup_logger()
|
|
140
|
+
sampl_vars = config.sampl_cfg.vars
|
|
129
141
|
|
|
130
142
|
title = " Writing PDFs "
|
|
131
143
|
log.info(f"[bold green]{title:-^100}")
|
|
132
|
-
|
|
133
144
|
log.info(f"[bold green]Estimating PDFs using {config.num_jets_estimate_hist:,} jets...")
|
|
134
|
-
|
|
145
|
+
|
|
146
|
+
# Create check variable to ensure at least one component was processed
|
|
147
|
+
component_processed = not component_to_run
|
|
148
|
+
|
|
149
|
+
# Process the different components
|
|
135
150
|
for component in config.components:
|
|
151
|
+
# Check if only one component should be processed
|
|
152
|
+
if isinstance(component_to_run, str) and component_to_run != component.name:
|
|
153
|
+
continue
|
|
154
|
+
|
|
136
155
|
log.info(f"Estimating {component} PDF using {config.num_jets_estimate_hist:,} samples...")
|
|
137
|
-
component.setup_reader(config.batch_size, config.jets_name)
|
|
156
|
+
component.setup_reader(batch_size=config.batch_size, jets_name=config.jets_name)
|
|
138
157
|
cuts_no_split = component.cuts.ignore(["eventNumber"])
|
|
139
158
|
|
|
140
159
|
###
|
|
@@ -146,7 +165,29 @@ def create_histograms(config) -> None:
|
|
|
146
165
|
silent=False,
|
|
147
166
|
raise_error=False,
|
|
148
167
|
)
|
|
149
|
-
|
|
150
|
-
|
|
168
|
+
|
|
169
|
+
# Load the jets from file used for resampling
|
|
170
|
+
jets = component.get_jets(
|
|
171
|
+
variables=sampl_vars,
|
|
172
|
+
num_jets=config.num_jets_estimate_hist,
|
|
173
|
+
cuts=cuts_no_split,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Write out the hist used for resampling
|
|
177
|
+
component.hist.write_hist(
|
|
178
|
+
jets=jets,
|
|
179
|
+
resampling_vars=sampl_vars,
|
|
180
|
+
bins=config.sampl_cfg.flat_bins,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Set the check variable to true
|
|
184
|
+
component_processed = True
|
|
185
|
+
|
|
186
|
+
# Raise error of no region was processed
|
|
187
|
+
if component_processed is False:
|
|
188
|
+
raise ValueError(
|
|
189
|
+
"No component processed during resampling! Check that you correctly spelled "
|
|
190
|
+
"the component name when running with --component!"
|
|
191
|
+
)
|
|
151
192
|
|
|
152
193
|
log.info(f"[bold green]Saved to {config.components[0].hist.path.parent}/")
|