umami-preprocessing 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {umami_preprocessing-0.3.0/umami_preprocessing.egg-info → umami_preprocessing-0.3.1}/PKG-INFO +15 -17
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/README.md +2 -1
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/pyproject.toml +18 -7
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1/umami_preprocessing.egg-info}/PKG-INFO +15 -17
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/umami_preprocessing.egg-info/SOURCES.txt +1 -0
- umami_preprocessing-0.3.1/umami_preprocessing.egg-info/requires.txt +8 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/__init__.py +1 -1
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/classes/__init__.py +2 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/classes/components.py +15 -6
- umami_preprocessing-0.3.1/upp/classes/plotting_config.py +121 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/classes/preprocessing_config.py +47 -3
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/classes/resampling_config.py +4 -3
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/main.py +13 -7
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/stages/hist.py +5 -1
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/stages/merging.py +21 -12
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/stages/normalisation.py +6 -2
- umami_preprocessing-0.3.1/upp/stages/plot.py +762 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/stages/resampling.py +68 -36
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/stages/reweight.py +44 -23
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/stages/rw_merge.py +10 -15
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/stages/split_containers.py +1 -1
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/utils/check_input_samples.py +1 -0
- umami_preprocessing-0.3.0/umami_preprocessing.egg-info/requires.txt +0 -20
- umami_preprocessing-0.3.0/upp/stages/plot.py +0 -209
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/LICENSE +0 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/MANIFEST.in +0 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/setup.cfg +0 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/umami_preprocessing.egg-info/dependency_links.txt +0 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/umami_preprocessing.egg-info/entry_points.txt +0 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/umami_preprocessing.egg-info/top_level.txt +0 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/classes/region.py +0 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/classes/reweight_config.py +0 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/classes/variable_config.py +0 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/stages/__init__.py +7 -7
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/stages/interpolation.py +0 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/utils/__init__.py +1 -1
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/utils/logger.py +0 -0
- {umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1}/upp/utils/tools.py +0 -0
{umami_preprocessing-0.3.0/umami_preprocessing.egg-info → umami_preprocessing-0.3.1}/PKG-INFO
RENAMED
|
@@ -1,36 +1,34 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: umami-preprocessing
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: ATLAS Flavour Tagging Preprocessing - Umami PreProcessing (UPP)
|
|
5
5
|
Author: Alexander Froch
|
|
6
|
-
License:
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
7
|
Project-URL: Homepage, https://github.com/umami-hep/umami-preprocessing
|
|
8
8
|
Project-URL: Issue Tracker, https://github.com/umami-hep/umami-preprocessing/issues
|
|
9
|
-
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
17
|
+
Requires-Python: <3.15,>=3.11
|
|
10
18
|
Description-Content-Type: text/markdown
|
|
11
19
|
License-File: LICENSE
|
|
12
|
-
Requires-Dist: atlas-ftag-tools==0.3.
|
|
20
|
+
Requires-Dist: atlas-ftag-tools==0.3.3
|
|
13
21
|
Requires-Dist: dotmap>=1.3.30
|
|
14
22
|
Requires-Dist: numpy>=2.2.6
|
|
15
|
-
Requires-Dist: puma-hep==0.5.
|
|
23
|
+
Requires-Dist: puma-hep==0.5.3
|
|
16
24
|
Requires-Dist: pyyaml-include==1.3
|
|
17
25
|
Requires-Dist: PyYAML>=6.0.2
|
|
18
26
|
Requires-Dist: rich>=14.1.0
|
|
19
27
|
Requires-Dist: scipy>=1.15.3
|
|
20
|
-
Provides-Extra: dev
|
|
21
|
-
Requires-Dist: coverage>=7.10.6; extra == "dev"
|
|
22
|
-
Requires-Dist: ipykernel>=6.30.1; extra == "dev"
|
|
23
|
-
Requires-Dist: mypy>=1.18.1; extra == "dev"
|
|
24
|
-
Requires-Dist: pre-commit>=4.3.0; extra == "dev"
|
|
25
|
-
Requires-Dist: pydoclint>=0.7.3; extra == "dev"
|
|
26
|
-
Requires-Dist: pytest_notebook>=0.10.0; extra == "dev"
|
|
27
|
-
Requires-Dist: pytest-cov>=7.0.0; extra == "dev"
|
|
28
|
-
Requires-Dist: pytest-randomly>=4.0.1; extra == "dev"
|
|
29
|
-
Requires-Dist: pytest>=8.4.2; extra == "dev"
|
|
30
|
-
Requires-Dist: ruff>=0.13.0; extra == "dev"
|
|
31
28
|
Dynamic: license-file
|
|
32
29
|
|
|
33
|
-
[](https://github.com/astral-sh/ruff)
|
|
31
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
34
32
|
[](https://codecov.io/gh/umami-hep/umami-preprocessing)
|
|
35
33
|
[](https://badge.fury.io/py/umami-preprocessing)
|
|
36
34
|
[](https://umami-hep.github.io/umami-preprocessing//)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
[](https://github.com/astral-sh/ruff)
|
|
2
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
2
3
|
[](https://codecov.io/gh/umami-hep/umami-preprocessing)
|
|
3
4
|
[](https://badge.fury.io/py/umami-preprocessing)
|
|
4
5
|
[](https://umami-hep.github.io/umami-preprocessing//)
|
|
@@ -3,29 +3,39 @@ name = "umami-preprocessing"
|
|
|
3
3
|
description = "ATLAS Flavour Tagging Preprocessing - Umami PreProcessing (UPP)"
|
|
4
4
|
authors = [{name="Alexander Froch"}]
|
|
5
5
|
dynamic = ["version"]
|
|
6
|
-
license =
|
|
6
|
+
license = "Apache-2.0"
|
|
7
|
+
license-files = ["LICENSE"]
|
|
7
8
|
readme = "README.md"
|
|
8
|
-
requires-python = ">=3.
|
|
9
|
+
requires-python = ">=3.11,<3.15"
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Intended Audience :: Science/Research",
|
|
13
|
+
"Programming Language :: Python :: 3",
|
|
14
|
+
"Programming Language :: Python :: 3.11",
|
|
15
|
+
"Programming Language :: Python :: 3.12",
|
|
16
|
+
"Programming Language :: Python :: 3.13",
|
|
17
|
+
"Programming Language :: Python :: 3.14",
|
|
18
|
+
"Topic :: Scientific/Engineering :: Physics",
|
|
19
|
+
]
|
|
9
20
|
|
|
10
21
|
dependencies = [
|
|
11
|
-
"atlas-ftag-tools==0.3.
|
|
22
|
+
"atlas-ftag-tools==0.3.3",
|
|
12
23
|
"dotmap>=1.3.30",
|
|
13
24
|
"numpy>=2.2.6",
|
|
14
|
-
"puma-hep==0.5.
|
|
25
|
+
"puma-hep==0.5.3",
|
|
15
26
|
"pyyaml-include==1.3",
|
|
16
27
|
"PyYAML>=6.0.2",
|
|
17
28
|
"rich>=14.1.0",
|
|
18
29
|
"scipy>=1.15.3",
|
|
19
30
|
]
|
|
20
31
|
|
|
21
|
-
[
|
|
32
|
+
[dependency-groups]
|
|
22
33
|
dev = [
|
|
23
34
|
"coverage>=7.10.6",
|
|
24
35
|
"ipykernel>=6.30.1",
|
|
25
36
|
"mypy>=1.18.1",
|
|
26
37
|
"pre-commit>=4.3.0",
|
|
27
38
|
"pydoclint>=0.7.3",
|
|
28
|
-
"pytest_notebook>=0.10.0",
|
|
29
39
|
"pytest-cov>=7.0.0",
|
|
30
40
|
"pytest-randomly>=4.0.1",
|
|
31
41
|
"pytest>=8.4.2",
|
|
@@ -48,10 +58,11 @@ include-package-data = true
|
|
|
48
58
|
version = {attr = "upp.__version__"}
|
|
49
59
|
|
|
50
60
|
[build-system]
|
|
51
|
-
requires = ["setuptools>=
|
|
61
|
+
requires = ["setuptools>=77"]
|
|
52
62
|
build-backend = "setuptools.build_meta"
|
|
53
63
|
|
|
54
64
|
[tool.ruff]
|
|
65
|
+
target-version = "py311"
|
|
55
66
|
lint.select = ["I", "E", "W", "F", "B", "UP", "ARG", "SIM", "TID", "RUF", "D2", "D3", "D4"]
|
|
56
67
|
lint.ignore = ["RUF005"]
|
|
57
68
|
line-length = 100
|
{umami_preprocessing-0.3.0 → umami_preprocessing-0.3.1/umami_preprocessing.egg-info}/PKG-INFO
RENAMED
|
@@ -1,36 +1,34 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: umami-preprocessing
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: ATLAS Flavour Tagging Preprocessing - Umami PreProcessing (UPP)
|
|
5
5
|
Author: Alexander Froch
|
|
6
|
-
License:
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
7
|
Project-URL: Homepage, https://github.com/umami-hep/umami-preprocessing
|
|
8
8
|
Project-URL: Issue Tracker, https://github.com/umami-hep/umami-preprocessing/issues
|
|
9
|
-
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
17
|
+
Requires-Python: <3.15,>=3.11
|
|
10
18
|
Description-Content-Type: text/markdown
|
|
11
19
|
License-File: LICENSE
|
|
12
|
-
Requires-Dist: atlas-ftag-tools==0.3.
|
|
20
|
+
Requires-Dist: atlas-ftag-tools==0.3.3
|
|
13
21
|
Requires-Dist: dotmap>=1.3.30
|
|
14
22
|
Requires-Dist: numpy>=2.2.6
|
|
15
|
-
Requires-Dist: puma-hep==0.5.
|
|
23
|
+
Requires-Dist: puma-hep==0.5.3
|
|
16
24
|
Requires-Dist: pyyaml-include==1.3
|
|
17
25
|
Requires-Dist: PyYAML>=6.0.2
|
|
18
26
|
Requires-Dist: rich>=14.1.0
|
|
19
27
|
Requires-Dist: scipy>=1.15.3
|
|
20
|
-
Provides-Extra: dev
|
|
21
|
-
Requires-Dist: coverage>=7.10.6; extra == "dev"
|
|
22
|
-
Requires-Dist: ipykernel>=6.30.1; extra == "dev"
|
|
23
|
-
Requires-Dist: mypy>=1.18.1; extra == "dev"
|
|
24
|
-
Requires-Dist: pre-commit>=4.3.0; extra == "dev"
|
|
25
|
-
Requires-Dist: pydoclint>=0.7.3; extra == "dev"
|
|
26
|
-
Requires-Dist: pytest_notebook>=0.10.0; extra == "dev"
|
|
27
|
-
Requires-Dist: pytest-cov>=7.0.0; extra == "dev"
|
|
28
|
-
Requires-Dist: pytest-randomly>=4.0.1; extra == "dev"
|
|
29
|
-
Requires-Dist: pytest>=8.4.2; extra == "dev"
|
|
30
|
-
Requires-Dist: ruff>=0.13.0; extra == "dev"
|
|
31
28
|
Dynamic: license-file
|
|
32
29
|
|
|
33
|
-
[](https://github.com/astral-sh/ruff)
|
|
31
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
34
32
|
[](https://codecov.io/gh/umami-hep/umami-preprocessing)
|
|
35
33
|
[](https://badge.fury.io/py/umami-preprocessing)
|
|
36
34
|
[](https://umami-hep.github.io/umami-preprocessing//)
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from upp.classes.components import Component, Components
|
|
6
|
+
from upp.classes.plotting_config import PlottingConfig
|
|
6
7
|
from upp.classes.preprocessing_config import PreprocessingConfig
|
|
7
8
|
from upp.classes.region import Region
|
|
8
9
|
from upp.classes.resampling_config import ResamplingConfig
|
|
@@ -11,6 +12,7 @@ from upp.classes.variable_config import VariableConfig
|
|
|
11
12
|
__all__ = [
|
|
12
13
|
"Component",
|
|
13
14
|
"Components",
|
|
15
|
+
"PlottingConfig",
|
|
14
16
|
"PreprocessingConfig",
|
|
15
17
|
"Region",
|
|
16
18
|
"ResamplingConfig",
|
|
@@ -86,6 +86,9 @@ class Component:
|
|
|
86
86
|
if fname is None:
|
|
87
87
|
fname = self.sample.path
|
|
88
88
|
|
|
89
|
+
if "vds_dir" not in kwargs and self.sample.vds_dir is not None:
|
|
90
|
+
kwargs["vds_dir"] = self.sample.vds_dir
|
|
91
|
+
|
|
89
92
|
self.reader = H5Reader(
|
|
90
93
|
fname=fname,
|
|
91
94
|
batch_size=batch_size,
|
|
@@ -106,7 +109,8 @@ class Component:
|
|
|
106
109
|
Name of the group in which the jets are stored, by default "jets"
|
|
107
110
|
"""
|
|
108
111
|
dtypes = self.reader.dtypes(variables.combined())
|
|
109
|
-
|
|
112
|
+
# num_jets == -1 ("write all") -> 0 leading dim so the writer grows dynamically
|
|
113
|
+
shapes = self.reader.shapes(max(self.num_jets, 0), variables.keys())
|
|
110
114
|
self.writer = H5Writer(self.out_path, dtypes, shapes, jets_name=jets_name)
|
|
111
115
|
log.debug(f"Setup component writer at: {self.out_path}")
|
|
112
116
|
|
|
@@ -209,6 +213,10 @@ class Component:
|
|
|
209
213
|
ValueError
|
|
210
214
|
If more jets are requsted than available
|
|
211
215
|
"""
|
|
216
|
+
# num_req < 0 means "use all available jets" - nothing to check
|
|
217
|
+
if num_req < 0:
|
|
218
|
+
return
|
|
219
|
+
|
|
212
220
|
# Check if num_jets jets are aviailable after the cuts and sampling fraction
|
|
213
221
|
num_est = (
|
|
214
222
|
None if self.num_jets_estimate_available <= 0 else self.num_jets_estimate_available
|
|
@@ -313,9 +321,9 @@ class Components:
|
|
|
313
321
|
component_list = []
|
|
314
322
|
for component in config.config["components"]:
|
|
315
323
|
# Ensure equal_jets flag is correctly set
|
|
316
|
-
assert (
|
|
317
|
-
"equal_jets
|
|
318
|
-
)
|
|
324
|
+
assert "equal_jets" not in component, (
|
|
325
|
+
"equal_jets flag should be set in the sample config"
|
|
326
|
+
)
|
|
319
327
|
|
|
320
328
|
# Get the region cuts
|
|
321
329
|
region_cuts = (
|
|
@@ -337,6 +345,7 @@ class Components:
|
|
|
337
345
|
ntuple_dir=config.ntuple_dir,
|
|
338
346
|
name=component["sample"]["name"],
|
|
339
347
|
skip_checks=config.skip_checks,
|
|
348
|
+
vds_dir=config.vds_dir,
|
|
340
349
|
)
|
|
341
350
|
|
|
342
351
|
# Create the Component instances for the different flavours
|
|
@@ -360,8 +369,8 @@ class Components:
|
|
|
360
369
|
)
|
|
361
370
|
components = cls(component_list)
|
|
362
371
|
|
|
363
|
-
# Check the flavour ratios
|
|
364
|
-
if
|
|
372
|
+
# Check the flavour ratios (not meaningful when resampling is skipped)
|
|
373
|
+
if not config.skip_resampling:
|
|
365
374
|
components.check_flavour_ratios()
|
|
366
375
|
|
|
367
376
|
return components
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _default_variable_labels() -> dict[str, str]:
|
|
7
|
+
return {
|
|
8
|
+
"pt": "Jet $p_\\mathrm{T}$ [GeV]",
|
|
9
|
+
"eta": "Jet $|\\eta|$",
|
|
10
|
+
"mass": "Jet Mass [GeV]",
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _default_sample_labels() -> dict[str, str]:
|
|
15
|
+
return {
|
|
16
|
+
"ttbar": "$t\\bar{t}$",
|
|
17
|
+
"zprime": "$Z'$",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class PlottingConfig:
|
|
23
|
+
r"""
|
|
24
|
+
Options for the preprocessing resampling distribution plots.
|
|
25
|
+
|
|
26
|
+
These options are specified in the config file under the `plotting:` key.
|
|
27
|
+
Any omitted option uses the default defined by this class.
|
|
28
|
+
|
|
29
|
+
Attributes
|
|
30
|
+
----------
|
|
31
|
+
num_jets_plotting : int | None, optional
|
|
32
|
+
Number of jets loaded for plotting. If not set, use the global
|
|
33
|
+
`num_jets_estimate_plotting` value. By default None.
|
|
34
|
+
variable_labels : dict[str, str], optional
|
|
35
|
+
Display labels for plotted variables. Keys are matched case-insensitively
|
|
36
|
+
against variable names, with the longest matching key taking precedence.
|
|
37
|
+
User-provided labels are merged with the default pT, eta, and mass labels.
|
|
38
|
+
sample_labels : dict[str, str], optional
|
|
39
|
+
Display labels for input samples. User-provided labels are merged with the
|
|
40
|
+
default ttbar and zprime labels.
|
|
41
|
+
ylabel : str, optional
|
|
42
|
+
Label for the y-axis. The `{jets_name}` placeholder is replaced with the
|
|
43
|
+
configured jet dataset name. By default "Normalised Number of {jets_name}".
|
|
44
|
+
atlas_first_tag : str, optional
|
|
45
|
+
First ATLAS plot label. By default "Simulation Internal".
|
|
46
|
+
atlas_second_tag : str, optional
|
|
47
|
+
Second ATLAS plot label. By default "$\\sqrt{s} = 13/13.6\\,\\mathrm{TeV}$".
|
|
48
|
+
show_num_jets : bool, optional
|
|
49
|
+
Decide, if the number of jets is shown in the ATLAS second tag
|
|
50
|
+
output_formats : list[str], optional
|
|
51
|
+
File formats in which each plot is saved. By default `["pdf", "png"]`.
|
|
52
|
+
linestyles : list[str], optional
|
|
53
|
+
Linestyles used to distinguish input samples. By default
|
|
54
|
+
`["-", "--", "-.", ":"]`.
|
|
55
|
+
bins : int, optional
|
|
56
|
+
Number of histogram bins. By default 50.
|
|
57
|
+
norm : bool, optional
|
|
58
|
+
Normalise each histogram before plotting. By default True.
|
|
59
|
+
underoverflow : bool, optional
|
|
60
|
+
Include underflow and overflow values in the edge bins. By default True.
|
|
61
|
+
y_scale : float, optional
|
|
62
|
+
Scale factor applied to the automatically determined y-axis range.
|
|
63
|
+
By default 1.5.
|
|
64
|
+
figsize : list[float], optional
|
|
65
|
+
Figure width and height. By default `[6, 4]`.
|
|
66
|
+
logy : bool, optional
|
|
67
|
+
Use a logarithmic y-axis. By default True.
|
|
68
|
+
legend_location : str, optional
|
|
69
|
+
Location of the flavour legend. By default "upper right".
|
|
70
|
+
linestyle_legend_location : str, optional
|
|
71
|
+
Location of the sample-linestyle legend. By default "upper center".
|
|
72
|
+
linestyle_legend_anchor : list[float], optional
|
|
73
|
+
Anchor position of the sample-linestyle legend. By default `[0.55, 1]`.
|
|
74
|
+
output_directory : str, optional
|
|
75
|
+
Plot directory relative to the preprocessing output directory.
|
|
76
|
+
By default "plots".
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
num_jets_plotting: int | None = None
|
|
80
|
+
variable_labels: dict[str, str] = field(default_factory=_default_variable_labels)
|
|
81
|
+
sample_labels: dict[str, str] = field(default_factory=_default_sample_labels)
|
|
82
|
+
ylabel: str = "Normalised Number of {jets_name}"
|
|
83
|
+
atlas_first_tag: str = "Simulation Internal"
|
|
84
|
+
atlas_second_tag: str = "$\\sqrt{s} = 13/13.6\\,\\mathrm{TeV}$"
|
|
85
|
+
show_num_jets: bool = True
|
|
86
|
+
output_formats: list[str] = field(default_factory=lambda: ["pdf", "png"])
|
|
87
|
+
linestyles: list[str] = field(default_factory=lambda: ["-", "--", "-.", ":"])
|
|
88
|
+
bins: int = 50
|
|
89
|
+
norm: bool = True
|
|
90
|
+
underoverflow: bool = True
|
|
91
|
+
y_scale: float = 1.5
|
|
92
|
+
figsize: list[float] = field(default_factory=lambda: [6, 4])
|
|
93
|
+
logy: bool = True
|
|
94
|
+
legend_location: str = "upper right"
|
|
95
|
+
linestyle_legend_location: str = "upper center"
|
|
96
|
+
linestyle_legend_anchor: list[float] = field(default_factory=lambda: [0.55, 1])
|
|
97
|
+
output_directory: str = "plots"
|
|
98
|
+
|
|
99
|
+
def __post_init__(self) -> None:
|
|
100
|
+
self.variable_labels = {**_default_variable_labels(), **self.variable_labels}
|
|
101
|
+
self.sample_labels = {**_default_sample_labels(), **self.sample_labels}
|
|
102
|
+
if self.num_jets_plotting is not None and self.num_jets_plotting <= 0:
|
|
103
|
+
raise ValueError("plotting.num_jets_plotting must be a positive integer or None")
|
|
104
|
+
if not self.output_formats:
|
|
105
|
+
raise ValueError("plotting.output_formats must contain at least one format")
|
|
106
|
+
if not self.linestyles:
|
|
107
|
+
raise ValueError("plotting.linestyles must contain at least one linestyle")
|
|
108
|
+
|
|
109
|
+
def variable_label(self, variable: str) -> str:
|
|
110
|
+
"""Return the configured display label for a variable."""
|
|
111
|
+
variable_lower = variable.lower()
|
|
112
|
+
for name, label in sorted(
|
|
113
|
+
self.variable_labels.items(), key=lambda item: len(item[0]), reverse=True
|
|
114
|
+
):
|
|
115
|
+
if name.lower() in variable_lower:
|
|
116
|
+
return label
|
|
117
|
+
return variable
|
|
118
|
+
|
|
119
|
+
def sample_label(self, sample: str) -> str:
|
|
120
|
+
"""Return the configured display label for a sample."""
|
|
121
|
+
return self.sample_labels.get(sample, sample)
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
import functools
|
|
5
5
|
import logging as log
|
|
6
|
+
import subprocess
|
|
6
7
|
from copy import copy
|
|
7
8
|
from dataclasses import dataclass
|
|
8
9
|
from pathlib import Path
|
|
@@ -19,6 +20,7 @@ from yamlinclude import YamlIncludeConstructor
|
|
|
19
20
|
|
|
20
21
|
from upp import __version__
|
|
21
22
|
from upp.classes.components import Components
|
|
23
|
+
from upp.classes.plotting_config import PlottingConfig
|
|
22
24
|
from upp.classes.resampling_config import ResamplingConfig
|
|
23
25
|
from upp.classes.reweight_config import ReweightConfig
|
|
24
26
|
from upp.classes.variable_config import VariableConfig
|
|
@@ -119,6 +121,9 @@ class PreprocessingConfig:
|
|
|
119
121
|
Skip checks for the input files. This is used for grid submission
|
|
120
122
|
skip_config_copy : bool, optional
|
|
121
123
|
Decide, if the config copying is skipped or not. By default False
|
|
124
|
+
vds_dir : Path | None, optional
|
|
125
|
+
Directory name for creation of virtual datasets. By default None
|
|
126
|
+
If none is given, virtual datasets is created next to input ntuples
|
|
122
127
|
"""
|
|
123
128
|
|
|
124
129
|
config_path: Path
|
|
@@ -142,6 +147,7 @@ class PreprocessingConfig:
|
|
|
142
147
|
num_jets_per_output_file: int | None = None
|
|
143
148
|
skip_checks: bool = False
|
|
144
149
|
skip_config_copy: bool = False
|
|
150
|
+
vds_dir: Path | None = None
|
|
145
151
|
|
|
146
152
|
def __post_init__(self):
|
|
147
153
|
# postprocess paths
|
|
@@ -158,6 +164,9 @@ class PreprocessingConfig:
|
|
|
158
164
|
for field in dataclasses.fields(self):
|
|
159
165
|
if field.type == "Path" and field.name != "out_fname" and field.name != "base_dir":
|
|
160
166
|
setattr(self, field.name, self.get_path(Path(getattr(self, field.name))))
|
|
167
|
+
# vds_dir is optional (Path | None), so the loop above skips it; resolve it here
|
|
168
|
+
if self.vds_dir is not None:
|
|
169
|
+
self.vds_dir = self.get_path(Path(self.vds_dir))
|
|
161
170
|
if not self.ntuple_dir.exists() and not self.skip_checks:
|
|
162
171
|
raise FileNotFoundError(f"Path {self.ntuple_dir} does not exist")
|
|
163
172
|
self.components_dir = self.components_dir / self.split
|
|
@@ -202,7 +211,7 @@ class PreprocessingConfig:
|
|
|
202
211
|
self.variables = VariableConfig(
|
|
203
212
|
self.config["variables"], self.jets_name, self.is_test, selectors
|
|
204
213
|
)
|
|
205
|
-
if self.sampl_cfg is not None:
|
|
214
|
+
if self.sampl_cfg is not None and self.sampl_cfg.variables:
|
|
206
215
|
self.variables = self.variables.add_jet_vars(
|
|
207
216
|
list(self.config["resampling"]["variables"].keys()), "labels"
|
|
208
217
|
)
|
|
@@ -217,8 +226,19 @@ class PreprocessingConfig:
|
|
|
217
226
|
if "reweighting" in self.config
|
|
218
227
|
else None
|
|
219
228
|
)
|
|
229
|
+
self.plotting = PlottingConfig(**self.config.get("plotting", {}))
|
|
230
|
+
if self.plotting.num_jets_plotting is None:
|
|
231
|
+
self.plotting.num_jets_plotting = self.num_jets_estimate_plotting
|
|
232
|
+
|
|
220
233
|
# reproducibility
|
|
221
|
-
|
|
234
|
+
try:
|
|
235
|
+
self.git_hash = get_git_hash(Path(__file__).parent)
|
|
236
|
+
except (OSError, subprocess.CalledProcessError):
|
|
237
|
+
log.warning(
|
|
238
|
+
"Could not determine the git hash (is git installed and on PATH?); "
|
|
239
|
+
"using the UPP version for reproducibility metadata instead."
|
|
240
|
+
)
|
|
241
|
+
self.git_hash = None
|
|
222
242
|
if self.git_hash is None:
|
|
223
243
|
self.git_hash = __version__
|
|
224
244
|
self.config["upp_hash"] = self.git_hash
|
|
@@ -255,12 +275,36 @@ class PreprocessingConfig:
|
|
|
255
275
|
def is_test(self):
|
|
256
276
|
return self.split == "test"
|
|
257
277
|
|
|
278
|
+
@property
|
|
279
|
+
def skip_resampling(self) -> bool:
|
|
280
|
+
"""Return whether resampling is disabled (no block, or method none).
|
|
281
|
+
|
|
282
|
+
Returns
|
|
283
|
+
-------
|
|
284
|
+
bool
|
|
285
|
+
``True`` if resampling should be skipped.
|
|
286
|
+
"""
|
|
287
|
+
return self.sampl_cfg is None or self.sampl_cfg.method in (None, "none")
|
|
288
|
+
|
|
289
|
+
@property
|
|
290
|
+
def resampling_method(self) -> str:
|
|
291
|
+
"""Resampling method recorded in the output metadata ("none" if skipped).
|
|
292
|
+
|
|
293
|
+
Returns
|
|
294
|
+
-------
|
|
295
|
+
str
|
|
296
|
+
The resampling method (e.g. ``"pdf"``/``"countup"``), or ``"none"``.
|
|
297
|
+
"""
|
|
298
|
+
if self.skip_resampling:
|
|
299
|
+
return "none"
|
|
300
|
+
return self.sampl_cfg.method
|
|
301
|
+
|
|
258
302
|
@functools.cached_property
|
|
259
303
|
def global_cuts(self):
|
|
260
304
|
cuts_list = self.config["global_cuts"].get("common", [])
|
|
261
305
|
cuts_list += self.config["global_cuts"][self.split]
|
|
262
306
|
if not self.is_test and self.config.get("resampling", None) is not None:
|
|
263
|
-
for resampling_var, cfg in self.config["resampling"]
|
|
307
|
+
for resampling_var, cfg in self.config["resampling"].get("variables", {}).items():
|
|
264
308
|
cuts_list.append([resampling_var, ">", cfg["bins"][0][0]])
|
|
265
309
|
cuts_list.append([resampling_var, "<", cfg["bins"][-1][1]])
|
|
266
310
|
return Cuts.from_list(cuts_list)
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from dataclasses import dataclass
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
@dataclass
|
|
9
9
|
class ResamplingConfig:
|
|
10
|
-
variables
|
|
11
|
-
|
|
10
|
+
# variables/target are only needed for pdf/countup resampling; optional when skipping
|
|
11
|
+
variables: dict = field(default_factory=dict)
|
|
12
|
+
target: str | None = None
|
|
12
13
|
sampling_fraction: float = 1.0
|
|
13
14
|
method: str | None = None
|
|
14
15
|
upscale_pdf: int | None = None
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Preprocessing pipeline for jet
|
|
2
|
+
Preprocessing pipeline for jet tagging.
|
|
3
3
|
|
|
4
4
|
By default all stages for the training split are run.
|
|
5
5
|
To run with only specific stages enabled, include the flag for the required stages.
|
|
6
6
|
To run without certain stages, include the corresponding negative flag.
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
To disable resampling, omit the `resampling` block or set `method: none`. The jets passing
|
|
9
|
+
the cuts are then written directly, capped at each component's `num_jets` (use `num_jets: -1`
|
|
10
|
+
to keep all of them). The `--no-resample` flag only skips the resampling *stage* (e.g. to
|
|
11
|
+
re-run later stages); it does not disable resampling.
|
|
10
12
|
"""
|
|
11
13
|
|
|
12
14
|
from __future__ import annotations
|
|
@@ -129,7 +131,11 @@ def parse_args(args: Any) -> argparse.Namespace:
|
|
|
129
131
|
"--reweight", "--rw", action="store_true", default=False, help="Run the reweighting stage"
|
|
130
132
|
)
|
|
131
133
|
parser.add_argument(
|
|
132
|
-
"--rw-merge",
|
|
134
|
+
"--rw-merge",
|
|
135
|
+
"--rwm",
|
|
136
|
+
action="store_true",
|
|
137
|
+
default=False,
|
|
138
|
+
help="Run the reweighting merge stage",
|
|
133
139
|
)
|
|
134
140
|
parser.add_argument(
|
|
135
141
|
"--rw-merge-idx",
|
|
@@ -137,7 +143,7 @@ def parse_args(args: Any) -> argparse.Namespace:
|
|
|
137
143
|
type=str,
|
|
138
144
|
default=None,
|
|
139
145
|
help=(
|
|
140
|
-
"
|
|
146
|
+
"Comma-separated pair of indices representing the range of output "
|
|
141
147
|
"files to create, e.g '0,10' will create files 0 to 9"
|
|
142
148
|
),
|
|
143
149
|
)
|
|
@@ -149,7 +155,7 @@ def parse_args(args: Any) -> argparse.Namespace:
|
|
|
149
155
|
parser.add_argument(
|
|
150
156
|
"--skip-sample-check",
|
|
151
157
|
action="store_true",
|
|
152
|
-
help="Skip the
|
|
158
|
+
help="Skip the initial input sample check",
|
|
153
159
|
)
|
|
154
160
|
parser.add_argument(
|
|
155
161
|
"--grid", action="store_true", help="Use when running the split stage on the grid. "
|
|
@@ -231,7 +237,7 @@ def run_pp(args: argparse.Namespace) -> None:
|
|
|
231
237
|
verbose=True,
|
|
232
238
|
)
|
|
233
239
|
|
|
234
|
-
if args.split == "train":
|
|
240
|
+
if args.split == "train" and not config.skip_resampling:
|
|
235
241
|
create_histograms(
|
|
236
242
|
config=config,
|
|
237
243
|
component_to_run=args.component,
|
|
@@ -41,8 +41,9 @@ def bin_jets(array: dict, bins: list) -> tuple[np.ndarray, np.ndarray]:
|
|
|
41
41
|
bin in which this observation falls. The representation depends on the
|
|
42
42
|
`expand_binnumbers` argument. See `Notes` for details.
|
|
43
43
|
"""
|
|
44
|
+
sample = s2u(array).astype(np.float64, copy=False)
|
|
44
45
|
hist, _, out_bins = binned_statistic_dd(
|
|
45
|
-
sample=
|
|
46
|
+
sample=sample,
|
|
46
47
|
values=None,
|
|
47
48
|
statistic="count",
|
|
48
49
|
bins=bins,
|
|
@@ -145,6 +146,9 @@ def create_histograms(
|
|
|
145
146
|
"""
|
|
146
147
|
# Setup the logger and load the variables used for resampling
|
|
147
148
|
setup_logger()
|
|
149
|
+
if config.skip_resampling:
|
|
150
|
+
log.info("Resampling is disabled - skipping histogram/PDF creation.")
|
|
151
|
+
return
|
|
148
152
|
sampl_vars = config.sampl_cfg.vars
|
|
149
153
|
|
|
150
154
|
title = " Writing PDFs "
|