umami-preprocessing 0.2.5__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. umami_preprocessing-0.2.6/LICENSE +201 -0
  2. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/PKG-INFO +5 -3
  3. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/pyproject.toml +5 -4
  4. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/umami_preprocessing.egg-info/PKG-INFO +5 -3
  5. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/umami_preprocessing.egg-info/SOURCES.txt +7 -3
  6. umami_preprocessing-0.2.6/umami_preprocessing.egg-info/dependency_links.txt +1 -0
  7. umami_preprocessing-0.2.6/umami_preprocessing.egg-info/entry_points.txt +3 -0
  8. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/umami_preprocessing.egg-info/requires.txt +2 -2
  9. umami_preprocessing-0.2.6/upp/__init__.py +15 -0
  10. umami_preprocessing-0.2.6/upp/classes/__init__.py +18 -0
  11. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/upp/classes/components.py +7 -12
  12. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/upp/classes/preprocessing_config.py +71 -39
  13. umami_preprocessing-0.2.6/upp/main.py +222 -0
  14. umami_preprocessing-0.2.6/upp/stages/__init__.py +26 -0
  15. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/upp/stages/hist.py +17 -9
  16. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/upp/stages/merging.py +28 -14
  17. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/upp/stages/normalisation.py +1 -8
  18. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/upp/stages/plot.py +8 -2
  19. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/upp/stages/resampling.py +8 -17
  20. umami_preprocessing-0.2.6/upp/utils/__init__.py +12 -0
  21. umami_preprocessing-0.2.6/upp/utils/check_input_samples.py +251 -0
  22. umami_preprocessing-0.2.6/upp/utils/tools.py +10 -0
  23. umami_preprocessing-0.2.5/umami_preprocessing.egg-info/entry_points.txt +0 -2
  24. umami_preprocessing-0.2.5/upp/__init__.py +0 -5
  25. umami_preprocessing-0.2.5/upp/classes/__init__.py +0 -0
  26. umami_preprocessing-0.2.5/upp/main.py +0 -127
  27. umami_preprocessing-0.2.5/upp/stages/__init__.py +0 -0
  28. umami_preprocessing-0.2.5/upp/utils.py +0 -5
  29. /umami_preprocessing-0.2.5/umami_preprocessing.egg-info/dependency_links.txt → /umami_preprocessing-0.2.6/MANIFEST.in +0 -0
  30. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/README.md +0 -0
  31. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/setup.cfg +0 -0
  32. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/umami_preprocessing.egg-info/top_level.txt +0 -0
  33. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/upp/classes/region.py +0 -0
  34. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/upp/classes/resampling_config.py +0 -0
  35. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/upp/classes/variable_config.py +0 -0
  36. {umami_preprocessing-0.2.5 → umami_preprocessing-0.2.6}/upp/stages/interpolation.py +0 -0
  37. {umami_preprocessing-0.2.5/upp → umami_preprocessing-0.2.6/upp/utils}/logger.py +0 -0
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [2025] [Alexander Froch]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
@@ -1,14 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: umami-preprocessing
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: Preprocessing for jet tagging
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/umami-hep/umami-preprocessing
7
7
  Requires-Python: <3.12,>=3.8
8
8
  Description-Content-Type: text/markdown
9
- Requires-Dist: atlas-ftag-tools==0.2.14
9
+ License-File: LICENSE
10
+ Requires-Dist: atlas-ftag-tools==0.2.15
10
11
  Requires-Dist: dotmap==1.3.30
11
- Requires-Dist: puma-hep==0.4.9
12
+ Requires-Dist: puma-hep==0.4.10
12
13
  Requires-Dist: pyyaml-include==1.3
13
14
  Requires-Dist: PyYAML>=6.0.1
14
15
  Requires-Dist: rich==12.6.0
@@ -20,6 +21,7 @@ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
20
21
  Requires-Dist: pytest-mock==3.11.1; extra == "dev"
21
22
  Requires-Dist: pytest>=7.2.2; extra == "dev"
22
23
  Requires-Dist: ruff==0.6.2; extra == "dev"
24
+ Dynamic: license-file
23
25
 
24
26
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
25
27
  [![codecov](https://codecov.io/gh/umami-hep/umami-preprocessing/graph/badge.svg?token=K8MJI20UZO)](https://codecov.io/gh/umami-hep/umami-preprocessing)
@@ -7,9 +7,9 @@ readme = "README.md"
7
7
  requires-python = "<3.12,>=3.8"
8
8
 
9
9
  dependencies = [
10
- "atlas-ftag-tools==0.2.14",
10
+ "atlas-ftag-tools==0.2.15",
11
11
  "dotmap==1.3.30",
12
- "puma-hep==0.4.9",
12
+ "puma-hep==0.4.10",
13
13
  "pyyaml-include==1.3",
14
14
  "PyYAML>=6.0.1",
15
15
  "rich==12.6.0",
@@ -31,9 +31,10 @@ dev = [
31
31
 
32
32
  [project.scripts]
33
33
  preprocess = "upp.main:main"
34
+ check_input_samples = "upp.utils.check_input_samples:main"
34
35
 
35
36
  [tool.setuptools]
36
- packages = ["upp", "upp.classes", "upp.stages"]
37
+ packages = ["upp", "upp.classes", "upp.stages", "upp.utils"]
37
38
  include-package-data = true
38
39
 
39
40
  [tool.setuptools.dynamic]
@@ -45,7 +46,7 @@ build-backend = "setuptools.build_meta"
45
46
 
46
47
  [tool.ruff]
47
48
  lint.select = ["I", "E", "W", "F", "B", "UP", "ARG", "SIM", "TID", "RUF", "D2", "D3", "D4"]
48
- lint.ignore = ["D211", "D213", "RUF005"]
49
+ lint.ignore = ["RUF005"]
49
50
  line-length = 100
50
51
 
51
52
  [tool.ruff.lint.isort]
@@ -1,14 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: umami-preprocessing
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: Preprocessing for jet tagging
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/umami-hep/umami-preprocessing
7
7
  Requires-Python: <3.12,>=3.8
8
8
  Description-Content-Type: text/markdown
9
- Requires-Dist: atlas-ftag-tools==0.2.14
9
+ License-File: LICENSE
10
+ Requires-Dist: atlas-ftag-tools==0.2.15
10
11
  Requires-Dist: dotmap==1.3.30
11
- Requires-Dist: puma-hep==0.4.9
12
+ Requires-Dist: puma-hep==0.4.10
12
13
  Requires-Dist: pyyaml-include==1.3
13
14
  Requires-Dist: PyYAML>=6.0.1
14
15
  Requires-Dist: rich==12.6.0
@@ -20,6 +21,7 @@ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
20
21
  Requires-Dist: pytest-mock==3.11.1; extra == "dev"
21
22
  Requires-Dist: pytest>=7.2.2; extra == "dev"
22
23
  Requires-Dist: ruff==0.6.2; extra == "dev"
24
+ Dynamic: license-file
23
25
 
24
26
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
25
27
  [![codecov](https://codecov.io/gh/umami-hep/umami-preprocessing/graph/badge.svg?token=K8MJI20UZO)](https://codecov.io/gh/umami-hep/umami-preprocessing)
@@ -1,3 +1,5 @@
1
+ LICENSE
2
+ MANIFEST.in
1
3
  README.md
2
4
  pyproject.toml
3
5
  umami_preprocessing.egg-info/PKG-INFO
@@ -7,9 +9,7 @@ umami_preprocessing.egg-info/entry_points.txt
7
9
  umami_preprocessing.egg-info/requires.txt
8
10
  umami_preprocessing.egg-info/top_level.txt
9
11
  upp/__init__.py
10
- upp/logger.py
11
12
  upp/main.py
12
- upp/utils.py
13
13
  upp/classes/__init__.py
14
14
  upp/classes/components.py
15
15
  upp/classes/preprocessing_config.py
@@ -22,4 +22,8 @@ upp/stages/interpolation.py
22
22
  upp/stages/merging.py
23
23
  upp/stages/normalisation.py
24
24
  upp/stages/plot.py
25
- upp/stages/resampling.py
25
+ upp/stages/resampling.py
26
+ upp/utils/__init__.py
27
+ upp/utils/check_input_samples.py
28
+ upp/utils/logger.py
29
+ upp/utils/tools.py
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ check_input_samples = upp.utils.check_input_samples:main
3
+ preprocess = upp.main:main
@@ -1,6 +1,6 @@
1
- atlas-ftag-tools==0.2.14
1
+ atlas-ftag-tools==0.2.15
2
2
  dotmap==1.3.30
3
- puma-hep==0.4.9
3
+ puma-hep==0.4.10
4
4
  pyyaml-include==1.3
5
5
  PyYAML>=6.0.1
6
6
  rich==12.6.0
@@ -0,0 +1,15 @@
1
+ """UPP: Umami PreProcessing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "v0.2.6"
6
+
7
+ from . import classes, stages, utils
8
+ from .main import run_pp
9
+
10
+ __all__ = [
11
+ "classes",
12
+ "run_pp",
13
+ "stages",
14
+ "utils",
15
+ ]
@@ -0,0 +1,18 @@
1
+ """Classes from Umami-Preprocessing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from upp.classes.components import Component, Components
6
+ from upp.classes.preprocessing_config import PreprocessingConfig
7
+ from upp.classes.region import Region
8
+ from upp.classes.resampling_config import ResamplingConfig
9
+ from upp.classes.variable_config import VariableConfig
10
+
11
+ __all__ = [
12
+ "Component",
13
+ "Components",
14
+ "PreprocessingConfig",
15
+ "Region",
16
+ "ResamplingConfig",
17
+ "VariableConfig",
18
+ ]
@@ -25,7 +25,7 @@ class Component:
25
25
  It stores the needed information about the component and
26
26
  allow for certain features in terms of resampling.
27
27
 
28
- Parameters
28
+ Attributes
29
29
  ----------
30
30
  region : Region
31
31
  Region instance of the region for which this instance is setup
@@ -78,8 +78,10 @@ class Component:
78
78
  Batch size that is used for loading from file
79
79
  jets_name : str, optional
80
80
  Name of the group in which the jets are stored, by default "jets"
81
- fname : Path | str | list[Path | str], optional
81
+ fname : Path | str | list[Path | str] | None, optional
82
82
  Filename of the file(s) from which the jets are loaded, by default None
83
+ **kwargs
84
+ Additional kwargs passed to the H5Reader
83
85
  """
84
86
  if fname is None:
85
87
  fname = self.sample.path
@@ -292,13 +294,6 @@ class Components:
292
294
  """Components class to store and manage multiple Component instances."""
293
295
 
294
296
  def __init__(self, components: Components | list):
295
- """Init Components instance.
296
-
297
- Parameters
298
- ----------
299
- components : Components
300
- List of all Component instances that are to be managed.
301
- """
302
297
  self.components = components
303
298
 
304
299
  @classmethod
@@ -423,7 +418,7 @@ class Components:
423
418
 
424
419
  Returns
425
420
  -------
426
- list[str]
421
+ list[Label]
427
422
  List of flavours
428
423
  """
429
424
  return list(dict.fromkeys(c.flavour for c in self))
@@ -434,8 +429,8 @@ class Components:
434
429
 
435
430
  Returns
436
431
  -------
437
- list
438
- List with all the cuts
432
+ Cuts
433
+ Cuts object with all cuts
439
434
  """
440
435
  return sum((c.cuts for c in self), Cuts.from_list([]))
441
436
 
@@ -6,11 +6,11 @@ import logging as log
6
6
  from copy import copy
7
7
  from dataclasses import dataclass
8
8
  from pathlib import Path
9
- from typing import Literal
9
+ from typing import Any, Literal
10
10
 
11
11
  import yaml
12
12
  from dotmap import DotMap
13
- from ftag import Cuts
13
+ from ftag import Cuts, Extended_Flavours, Flavours
14
14
  from ftag.git_check import get_git_hash
15
15
  from ftag.labels import LabelContainer
16
16
  from ftag.track_selector import TrackSelector
@@ -21,7 +21,7 @@ from upp import __version__
21
21
  from upp.classes.components import Components
22
22
  from upp.classes.resampling_config import ResamplingConfig
23
23
  from upp.classes.variable_config import VariableConfig
24
- from upp.utils import path_append
24
+ from upp.utils.tools import path_append
25
25
 
26
26
  # support inclusion of yaml files in the config dir
27
27
  YamlIncludeConstructor.add_to_loader_class(
@@ -51,7 +51,7 @@ class PreprocessingConfig:
51
51
  ntuple_dir: h5-inputs # resolved path: /my/stuff/h5-inputs/
52
52
  ```
53
53
 
54
- Parameters
54
+ Attributes
55
55
  ----------
56
56
  config_path : Path
57
57
  Path to the config yaml file that is used. Does not need to be set in config.
@@ -82,20 +82,20 @@ class PreprocessingConfig:
82
82
  num_jets_estimate : int, optional
83
83
  Any of the further three arguments that are not specified will default to this value
84
84
  Is equal to 1_000_000 by default.
85
- num_jets_estimate_available : int, optional
85
+ num_jets_estimate_available : int | None, optional
86
86
  A sabsample taken from the whole sample to estimate the number of jets after the cuts.
87
87
  Please keep this number high in order to not get poisson error of more then 5%.
88
88
  If time allows you can use -1 to get a precise number of jets and not just an estimate
89
89
  although it will be slow for large datasets. Is equal to num_jets_estimate by default.
90
- num_jets_estimate_hist : int, optional
90
+ num_jets_estimate_hist : int | None, optional
91
91
  Number of jets of each flavour that are used to construct histograms for probability
92
92
  density function estimation. Larger numbers give a better quality estmate of the pdfs.
93
93
  Is equal to num_jets_estimate by default.
94
- num_jets_estimate_norm : int, optional
94
+ num_jets_estimate_norm : int | None, optional
95
95
  Number of jets of each flavour that are used to estimate shifting and scaling during
96
96
  normalisation step. Larger numbers give a better quality estmates.
97
97
  Is equal to num_jets_estimate by default.
98
- num_jets_estimate_plotting : int, optional
98
+ num_jets_estimate_plotting : int | None, optional
99
99
  Number of jets of each flavour used for plotting the initial and the final resampling
100
100
  variable distributions. Larger numbers give a better estimate of the full distributions.
101
101
  Is equal to num_jets_estimate by default.
@@ -103,6 +103,19 @@ class PreprocessingConfig:
103
103
  Merge the test samples of the different processes into one file. By default False.
104
104
  jets_name : str, optional
105
105
  Name of the jets dataset in the input file. By default "jets".
106
+ flavour_config : Path | None, optional
107
+ Flavour config yaml file which is to be used. By default None
108
+ flavour_category : str, optional
109
+ Flavour categories that are to be used. By default, the "standard" (non-extended)
110
+ labels are loaded. The extended labels can be used by setting this value to "extended".
111
+ By default "standard". To use this option, flavour_config must be None.
112
+ num_jets_per_output_file : int | None, optional
113
+ Number of jets per final output file. If the number of total jets is larger
114
+ than this number, the final h5 output files are splitted in multiple smaller
115
+ files with this number of jets per file. By default None which produces one
116
+ huge output file.
117
+ skip_config_copy : bool, optional
118
+ Decide, if the config copying is skipped or not. By default False
106
119
  """
107
120
 
108
121
  config_path: Path
@@ -122,7 +135,9 @@ class PreprocessingConfig:
122
135
  merge_test_samples: bool = False
123
136
  jets_name: str = "jets"
124
137
  flavour_config: Path | None = None
138
+ flavour_category: str = "standard"
125
139
  num_jets_per_output_file: int | None = None
140
+ skip_config_copy: bool = False
126
141
 
127
142
  def __post_init__(self):
128
143
  # postprocess paths
@@ -143,7 +158,25 @@ class PreprocessingConfig:
143
158
  raise FileNotFoundError(f"Path {self.ntuple_dir} does not exist")
144
159
  self.components_dir = self.components_dir / self.split
145
160
  self.out_fname = self.out_dir / path_append(self.out_fname, self.split)
146
- self.flavour_cont = LabelContainer.from_yaml(self.flavour_config)
161
+
162
+ # Define the content of the flavour label container
163
+ if self.flavour_config:
164
+ self.flavour_cont = LabelContainer.from_yaml(
165
+ yaml_path=self.flavour_config,
166
+ )
167
+
168
+ elif self.flavour_category == "standard":
169
+ self.flavour_cont = Flavours
170
+
171
+ elif self.flavour_category == "extended":
172
+ self.flavour_cont = Extended_Flavours
173
+
174
+ else:
175
+ raise ValueError(
176
+ f"flavour_category {self.flavour_category} is not supported in the default "
177
+ "flavours! If you want to use your own flavour config yaml file, please "
178
+ "provide flavour_config!"
179
+ )
147
180
 
148
181
  # configure classes
149
182
  sampl_cfg = copy(self.config["resampling"])
@@ -177,15 +210,22 @@ class PreprocessingConfig:
177
210
  self.config["upp_hash"] = self.git_hash
178
211
 
179
212
  # copy config
180
- self.copy_config()
213
+ if not self.skip_config_copy:
214
+ self.copy_config()
181
215
 
182
216
  @classmethod
183
- def from_file(cls, config_path: Path, split: Split):
217
+ def from_file(cls, config_path: Path, split: Split, skip_config_copy: bool = False):
184
218
  if not config_path.exists():
185
219
  raise FileNotFoundError(f"{config_path} does not exist - check your --config arg")
186
220
  with open(config_path) as file:
187
221
  config = yaml.safe_load(file)
188
- return cls(config_path, split, config, **config["global"])
222
+ return cls(
223
+ config_path=config_path,
224
+ split=split,
225
+ config=config,
226
+ skip_config_copy=skip_config_copy,
227
+ **config["global"],
228
+ )
189
229
 
190
230
  def get_path(self, path: Path):
191
231
  return path if path.is_absolute() else (self.base_dir / path).absolute()
@@ -204,24 +244,19 @@ class PreprocessingConfig:
204
244
  cuts_list.append([resampling_var, "<", cfg["bins"][-1][1]])
205
245
  return Cuts.from_list(cuts_list)
206
246
 
207
- def copy_config(self, suffix=None, out_dir=None):
208
- """
209
- Copy the configuration file to a new location with an optional suffix and output directory.
247
+ def copy_config(self, suffix: str | None = None, out_dir: str | Path | None = None) -> None:
248
+ """Copy the configuration file to a new location with an optional suffix and out directory.
210
249
 
211
250
  Parameters
212
251
  ----------
213
- suffix : str or None, optional
252
+ suffix : str | None, optional
214
253
  A suffix to append to the configuration file name. If None, the current
215
254
  `self.split` value will be used as the suffix (default is None).
216
255
 
217
- out_dir : str or None, optional
256
+ out_dir : str | Path | None, optional
218
257
  The output directory where the copied configuration file will be saved.
219
258
  If None, the current `self.out_dir` value will be used as the output directory
220
259
  (default is None).
221
-
222
- Returns
223
- -------
224
- None
225
260
  """
226
261
  if suffix is None:
227
262
  suffix = self.split
@@ -233,11 +268,7 @@ class PreprocessingConfig:
233
268
  with open(copy_config_path, "w") as file:
234
269
  yaml.dump(self.config, file, sort_keys=False)
235
270
 
236
- # following aliases and functins are needed to mimic the umami config structure and behaviour
237
- # so that --scaling --write and traing steps from umami are compatible with this config
238
- copy_to_out_dir = copy_config
239
-
240
- def get_umami_general(self):
271
+ def get_umami_general(self) -> DotMap:
241
272
  """
242
273
  Return the arguments to be fed into GeneralSettings class in umami.
243
274
 
@@ -260,13 +291,12 @@ class PreprocessingConfig:
260
291
  _dynamic=False,
261
292
  )
262
293
 
263
- def mimic_umami_config(self, general):
264
- """
265
- Make the config mimic the umami config structure and behaviour.
294
+ def mimic_umami_config(self, general: Any) -> PreprocessingConfig:
295
+ """Make the config mimic the umami config structure and behaviour.
266
296
 
267
297
  Parameters
268
298
  ----------
269
- general : umami.preprocessing_tools.configuration.GeneralSettings
299
+ general : Any
270
300
  first initialised in umami.preprocessing_tools.configuration.Configuration
271
301
  class in umami using get_umami_general() for arguments
272
302
  then feed it into mimic_umami_config() to get the rest of the config
@@ -274,7 +304,8 @@ class PreprocessingConfig:
274
304
 
275
305
  Returns
276
306
  -------
277
- self : upp.classes.preprocessing_config.PreprocessingConfig
307
+ PreprocessingConfig
308
+ Mimicing PreprocessingConfig instance
278
309
  """
279
310
  self.general = general
280
311
  self.sampling = DotMap(self.config["umami"]["sampling"], _dynamic=False)
@@ -289,9 +320,8 @@ class PreprocessingConfig:
289
320
  self.general.convert_to_tfrecord = self.config["umami"]["convert_to_tfrecord"]
290
321
  return self
291
322
 
292
- def get_file_name(self, option, **_):
293
- """
294
- Mimics the 'get_file_name()' function in PreprocessingConfig class in umami.
323
+ def get_file_name(self, option: str) -> Path | str:
324
+ """Mimics the 'get_file_name()' function in PreprocessingConfig class in umami.
295
325
 
296
326
  Parameters
297
327
  ----------
@@ -302,18 +332,15 @@ class PreprocessingConfig:
302
332
  original output file name with '_resampled_scaled_shuffled' appended.
303
333
  This option is used to create a new file name.
304
334
 
305
- use_val : bool, optional
306
- Currently not in use (default is False).
307
-
308
335
  Returns
309
336
  -------
310
- str
337
+ Path | str
311
338
  The resulting file name based on the specified 'option'.
312
339
 
313
340
  Raises
314
341
  ------
315
342
  ValueError
316
- If 'option' is not one of the recognized options.
343
+ If the option value is not supported
317
344
  """
318
345
  if option == "resampled":
319
346
  return self.out_fname
@@ -325,3 +352,8 @@ class PreprocessingConfig:
325
352
  + "_resampled_scaled_shuffled"
326
353
  + self.out_fname.suffix
327
354
  )
355
+ else:
356
+ raise ValueError(
357
+ f"Option value {option} is not supported! "
358
+ "Only resampled and resampled_scaled_shuffled are."
359
+ )