uproot-custom 1.1.3.dev1__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/PKG-INFO +1 -1
  2. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/cpp/include/uproot-custom/uproot-custom.hh +2 -0
  3. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/cpp/src/uproot-custom.cc +72 -13
  4. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/gen-demo-data/include/TComplicatedSTL.hh +45 -18
  5. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/gen-demo-data/src/main.cc +1 -1
  6. uproot_custom-1.3.0/tests/test-data-2.root +0 -0
  7. uproot_custom-1.3.0/tests/test_AsCustom.py +19 -0
  8. uproot_custom-1.3.0/tests/test_AsGroupedMap.py +29 -0
  9. uproot_custom-1.3.0/uproot_custom/AsBinary.py +95 -0
  10. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/uproot_custom/AsCustom.py +21 -44
  11. uproot_custom-1.3.0/uproot_custom/AsGroupedMap.py +44 -0
  12. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/uproot_custom/__init__.py +6 -2
  13. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/uproot_custom/_version.py +3 -3
  14. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/uproot_custom/readers.py +66 -57
  15. uproot_custom-1.3.0/uproot_custom/utils.py +63 -0
  16. uproot_custom-1.1.3.dev1/tests/test-data-2.root +0 -0
  17. uproot_custom-1.1.3.dev1/tests/test_AsCustom.py +0 -32
  18. uproot_custom-1.1.3.dev1/uproot_custom/AsBinary.py +0 -62
  19. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/.clang-format +0 -0
  20. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/.github/workflows/build-wheels.yml +0 -0
  21. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/.github/workflows/python-publish.yml +0 -0
  22. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/.github/workflows/run-pytest.yml +0 -0
  23. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/.gitignore +0 -0
  24. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/CHANGELOG.md +0 -0
  25. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/LICENSE +0 -0
  26. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/README.md +0 -0
  27. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/cpp/CMakeLists.txt +0 -0
  28. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/cpp/share/cmake/uproot-customConfig.cmake +0 -0
  29. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/README.md +0 -0
  30. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/cpp/CMakeLists.txt +0 -0
  31. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/cpp/my_reader.cc +0 -0
  32. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/gen-demo-data/CMakeLists.txt +0 -0
  33. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/gen-demo-data/include/LinkDef.h +0 -0
  34. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/gen-demo-data/include/TOverrideStreamer.hh +0 -0
  35. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/gen-demo-data/src/TOverrideStreamer.cc +0 -0
  36. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/my_reader/OverrideStreamerReader.py +0 -0
  37. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/my_reader/__init__.py +0 -0
  38. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/pyproject.toml +0 -0
  39. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/example/read-data.py +0 -0
  40. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/pyproject.toml +0 -0
  41. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/tests/test-data-1.root +0 -0
  42. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/tests/test_downstream_build.py +0 -0
  43. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/tests/test_downstream_build_pyproject.toml +0 -0
  44. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/uproot_custom/cpp.pyi +0 -0
  45. {uproot_custom-1.1.3.dev1 → uproot_custom-1.3.0}/uproot_custom/share/cmake/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: uproot-custom
3
- Version: 1.1.3.dev1
3
+ Version: 1.3.0
4
4
  Summary: uproot extension for reading custom classes
5
5
  Author-Email: Mingrun Li <mrli@ihep.ac.cn>
6
6
  Classifier: Development Status :: 3 - Alpha
@@ -148,7 +148,9 @@ namespace uproot {
148
148
  if ( fBits & ( kIsReferenced ) ) skip( 2 ); // pidf
149
149
  }
150
150
 
151
+ const uint8_t* get_data() const { return m_data; }
151
152
  const uint8_t* get_cursor() const { return m_cursor; }
153
+ const uint32_t* get_offsets() const { return m_offsets; }
152
154
  const uint64_t entries() const { return m_entries; }
153
155
 
154
156
  private:
@@ -378,39 +378,98 @@ namespace uproot {
378
378
 
379
379
  class CArrayReader : public IElementReader {
380
380
  public:
381
- CArrayReader( std::string name, bool is_obj, const uint32_t flat_size,
381
+ CArrayReader( std::string name, bool is_obj, bool is_stdmap, const int64_t flat_size,
382
382
  SharedReader element_reader )
383
383
  : IElementReader( name )
384
384
  , m_is_obj( is_obj )
385
+ , m_is_stdmap( is_stdmap )
385
386
  , m_flat_size( flat_size )
387
+ , m_offsets( std::make_shared<std::vector<uint32_t>>( 1, 0 ) )
386
388
  , m_element_reader( element_reader ) {}
387
389
 
388
390
  void read( BinaryBuffer& buffer ) override {
391
+
389
392
  PRINT_MSG( "CArrayReader::read() for " + m_name +
390
393
  " with flat_size = " + std::to_string( m_flat_size ) +
391
394
  ", is_obj = " + std::to_string( m_is_obj ) );
392
395
  PRINT_BUFFER( buffer );
393
- if ( m_is_obj )
396
+
397
+ if ( m_flat_size > 0 )
394
398
  {
395
- buffer.read_fNBytes();
396
- buffer.read_fVersion();
399
+ if ( m_is_obj )
400
+ {
401
+ buffer.read_fNBytes();
402
+ buffer.read_fVersion();
403
+ if ( m_is_stdmap ) buffer.skip( 6 );
404
+ }
405
+
406
+ for ( auto i = 0; i < m_flat_size; i++ )
407
+ {
408
+ PRINT_MSG( "CArrayReader::read() reading element " + std::to_string( i ) );
409
+ PRINT_BUFFER( buffer );
410
+ m_element_reader->read( buffer );
411
+ }
412
+ PRINT_MSG( "" );
413
+ PRINT_MSG( "" );
397
414
  }
398
- for ( auto i = 0; i < m_flat_size; i++ )
415
+
416
+ else
399
417
  {
400
- PRINT_MSG( "CArrayReader::read() reading element " + std::to_string( i ) );
401
- PRINT_BUFFER( buffer );
402
- m_element_reader->read( buffer );
418
+ // get end-position
419
+ auto n_entries = buffer.entries();
420
+ auto start_pos = buffer.get_data();
421
+ auto entry_offsets = buffer.get_offsets();
422
+ auto cursor_pos = buffer.get_cursor();
423
+ auto entry_end = std::find_if( entry_offsets, entry_offsets + n_entries + 1,
424
+ [start_pos, cursor_pos]( uint32_t offset ) {
425
+ return start_pos + offset > cursor_pos;
426
+ } );
427
+
428
+ PRINT_MSG( "CArrayReader::read() cursor_pos = " +
429
+ std::to_string( cursor_pos - start_pos ) +
430
+ "entry_end = " + std::to_string( *entry_end ) );
431
+
432
+ if ( m_is_obj )
433
+ {
434
+ buffer.read_fNBytes();
435
+ buffer.read_fVersion();
436
+ // if ( m_is_stdmap ) buffer.skip( 6 ); // Even std::map has no 6 bytes here.
437
+ }
438
+
439
+ uint32_t count = 0;
440
+ while ( buffer.get_cursor() < start_pos + *entry_end )
441
+ {
442
+
443
+ PRINT_MSG( "CArrayReader::read() reading element " +
444
+ std::to_string( count ) );
445
+ PRINT_BUFFER( buffer );
446
+ m_element_reader->read( buffer );
447
+ count += 1;
448
+ }
449
+
450
+ PRINT_MSG( "" );
451
+ PRINT_MSG( "" );
452
+
453
+ m_offsets->push_back( m_offsets->back() + count );
403
454
  }
404
- PRINT_MSG( "" );
405
- PRINT_MSG( "" );
406
455
  }
407
456
 
408
- py::object data() const override { return m_element_reader->data(); }
457
+ py::object data() const override {
458
+ if ( m_flat_size > 0 ) return m_element_reader->data();
459
+ else
460
+ {
461
+ auto offsets_array = make_array( m_offsets );
462
+ auto elements_data = m_element_reader->data();
463
+ return py::make_tuple( offsets_array, elements_data );
464
+ }
465
+ }
409
466
 
410
467
  private:
411
468
  bool m_is_obj;
412
- const uint32_t m_flat_size;
469
+ bool m_is_stdmap;
470
+ const int64_t m_flat_size;
413
471
  SharedReader m_element_reader;
472
+ SharedVector<uint32_t> m_offsets;
414
473
  };
415
474
 
416
475
  /*
@@ -481,7 +540,7 @@ namespace uproot {
481
540
  register_reader<BaseObjectReader, std::vector<SharedReader>>( m, "BaseObjectReader" );
482
541
  register_reader<ObjectHeaderReader, std::vector<SharedReader>>( m,
483
542
  "ObjectHeaderReader" );
484
- register_reader<CArrayReader, bool, uint32_t, SharedReader>( m, "CArrayReader" );
543
+ register_reader<CArrayReader, bool, bool, int64_t, SharedReader>( m, "CArrayReader" );
485
544
  register_reader<EmptyReader>( m, "EmptyReader" );
486
545
  }
487
546
 
@@ -17,13 +17,13 @@ using namespace std;
17
17
  class TComplicatedSTL : public TObject {
18
18
 
19
19
  public:
20
- TComplicatedSTL() : TObject() {}
21
-
22
- void fill() {
20
+ TComplicatedSTL() : TObject() {
23
21
  // Initialize 1 basic type element
24
22
  for ( int i = 0; i < 5; i++ )
25
23
  {
24
+
26
25
  vector<int> vec_int;
26
+ map<int, double> map_int_double;
27
27
  list<int> list_int;
28
28
  set<int> set_int;
29
29
  unordered_set<int> uset_int;
@@ -33,10 +33,21 @@ class TComplicatedSTL : public TObject {
33
33
  list_int.push_back( 10 * i + j );
34
34
  set_int.insert( 10 * i + j );
35
35
  uset_int.insert( 10 * i + j );
36
+ map_int_double[10 * i + j] = 0.1 * ( 10 * i + j );
36
37
  }
37
38
 
38
- // sequence like containers
39
+ // std::array
40
+ m_arr_int[i] = 100 + i;
39
41
  m_arr_vec_int[i] = vec_int;
42
+ m_arr_str[i] = "Hello, " + to_string( i );
43
+
44
+ // c-style array
45
+ m_carr_int[i] = 10 + i;
46
+ m_carr_vec_int[i] = vec_int;
47
+ m_carr_map_int_double[i] = map_int_double;
48
+ m_carr_str[i] = "World, " + to_string( i );
49
+
50
+ // sequence like containers
40
51
  m_vec_list_int.push_back( list_int );
41
52
  m_list_set_int.push_back( set_int );
42
53
  m_vec_uset_int.push_back( uset_int );
@@ -47,23 +58,40 @@ class TComplicatedSTL : public TObject {
47
58
  m_map_set_int[i] = set_int;
48
59
  m_umap_uset_int[i] = uset_int;
49
60
 
50
- /* ------------------------------------ */
51
- // mapping<sequence<object>> like containers
52
- vector<TComplicatedSTL> vec_obj;
53
- list<TComplicatedSTL*> list_objptr;
54
- for ( int j = 0; j < 3; j++ )
61
+ // nested containers
62
+ vector<list<set<int>>> vec_list_set_int;
63
+ for ( int j = 0; j < 2; j++ )
55
64
  {
56
- vec_obj.emplace_back();
57
- list_objptr.push_back( new TComplicatedSTL() );
65
+ list<set<int>> list_set_int;
66
+ for ( int k = 0; k < 2; k++ )
67
+ {
68
+ set<int> set_int;
69
+ for ( int l = 0; l < 3; l++ )
70
+ { set_int.insert( 100 * i + 10 * j + 2 * k + l ); }
71
+ list_set_int.push_back( set_int );
72
+ }
73
+ vec_list_set_int.push_back( list_set_int );
58
74
  }
59
- m_map_vec_obj[i] = vec_obj;
60
- m_map_list_objptr[i] = list_objptr;
75
+ m_map_vec_list_set_int[i] = vec_list_set_int;
61
76
  }
62
77
  }
63
78
 
64
79
  private:
65
- // sequence like containers
80
+ int m_marker{ 114514 }; // just a marker
81
+
82
+ // c-style array
83
+ int m_carr_int[5]{};
84
+ vector<int> m_carr_vec_int[5]{};
85
+ map<int, double> m_carr_map_int_double[5]{};
86
+ string m_carr_str[5]{};
87
+
88
+ // std::array
89
+ array<int, 5> m_arr_int;
66
90
  array<vector<int>, 5> m_arr_vec_int;
91
+ array<string, 5> m_arr_str;
92
+ // ROOT-6.32.02 does not support std::array of map
93
+
94
+ // sequence like containers
67
95
  vector<list<int>> m_vec_list_int;
68
96
  list<set<int>> m_list_set_int;
69
97
  vector<unordered_set<int>> m_vec_uset_int;
@@ -74,9 +102,8 @@ class TComplicatedSTL : public TObject {
74
102
  map<int, set<int>> m_map_set_int;
75
103
  unordered_map<int, unordered_set<int>> m_umap_uset_int;
76
104
 
77
- // mapping<sequence<object>> like containers
78
- map<int, vector<TComplicatedSTL>> m_map_vec_obj;
79
- map<int, list<TComplicatedSTL*>> m_map_list_objptr;
105
+ // nested containers
106
+ map<int, vector<list<set<int>>>> m_map_vec_list_set_int;
80
107
 
81
108
  ClassDef( TComplicatedSTL, 1 );
82
- };
109
+ };
@@ -18,7 +18,7 @@ int main() {
18
18
  {
19
19
  ovrd_steamer = TOverrideStreamer( i );
20
20
  complicated_stl = TComplicatedSTL();
21
- complicated_stl.fill();
21
+
22
22
  t.Fill();
23
23
  }
24
24
 
@@ -0,0 +1,19 @@
1
+ from pathlib import Path
2
+
3
+ import uproot
4
+ import uproot_custom
5
+
6
+
7
+ uproot_custom.AsCustom.target_branches |= {
8
+ "/my_tree:my_obj/m_carr_vec_int[3]",
9
+ "/my_tree:my_obj/m_int",
10
+ "/my_tree:my_obj/m_carr_tstring[3]",
11
+ "/my_tree:my_obj/m_carr2d_vec_int[2][3]",
12
+ "/my_tree:my_obj/m_carr2d_tstring[2][3]",
13
+ }
14
+
15
+
16
+ def test_AsCustom():
17
+ f = uproot.open(Path(__file__).parent / "test-data-1.root")
18
+ tree = f["my_tree"]
19
+ tree.arrays()
@@ -0,0 +1,29 @@
1
+ from pathlib import Path
2
+
3
+ import uproot
4
+
5
+ import uproot_custom
6
+
7
+ uproot_custom.AsCustom.target_branches |= {
8
+ "/my_tree:complicated_stl/m_arr_str[5]",
9
+ "/my_tree:complicated_stl/m_arr_vec_int[5]",
10
+ "/my_tree:complicated_stl/m_carr_vec_int[5]",
11
+ "/my_tree:complicated_stl/m_carr_str[5]",
12
+ "/my_tree:complicated_stl/m_vec_uset_int",
13
+ "/my_tree:complicated_stl/m_arr_map_int_double[5]",
14
+ "/my_tree:complicated_stl/m_carr_map_int_double[5]",
15
+ }
16
+
17
+ uproot_custom.AsGroupedMap.target_branches |= {
18
+ "/my_tree:complicated_stl/m_map_vec_int/m_map_vec_int.second",
19
+ "/my_tree:complicated_stl/m_umap_list_int/m_umap_list_int.second",
20
+ "/my_tree:complicated_stl/m_map_set_int/m_map_set_int.second",
21
+ "/my_tree:complicated_stl/m_umap_uset_int/m_umap_uset_int.second",
22
+ "/my_tree:complicated_stl/m_map_vec_list_set_int/m_map_vec_list_set_int.second",
23
+ }
24
+
25
+
26
+ def test_AsGroupedMap():
27
+ f = uproot.open(Path(__file__).parent / "test-data-2.root")
28
+ tree = f["my_tree/complicated_stl"]
29
+ tree.arrays()
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ import awkward
4
+ import numpy
5
+ import uproot
6
+ import uproot.extras
7
+ import uproot.interpretation
8
+
9
+
10
+ class AsBinary(uproot.interpretation.Interpretation):
11
+ """
12
+ Return binary data of the ``TBasket``. Pass an instance of this class
13
+ to :ref:`uproot.behaviors.TBranch.TBranch.array` like this:
14
+
15
+ .. code-block:: python
16
+ binary_data = branch.array(interpretation=AsBinary())
17
+
18
+ """
19
+
20
+ @property
21
+ def cache_key(self) -> str:
22
+ return id(self)
23
+
24
+ def basket_array(
25
+ self,
26
+ data,
27
+ byte_offsets,
28
+ basket,
29
+ branch,
30
+ context,
31
+ cursor_offset,
32
+ library,
33
+ interp_options,
34
+ ):
35
+ if byte_offsets is not None:
36
+ counts = byte_offsets[1:] - byte_offsets[:-1]
37
+ else:
38
+ counts = None
39
+
40
+ if library.name == "ak":
41
+ awkward = uproot.extras.awkward()
42
+ if counts is not None:
43
+ return awkward.unflatten(data, counts)
44
+ else:
45
+ fSize = branch.streamer.member("fSize")
46
+ return awkward.from_numpy(data.reshape(-1, fSize))
47
+
48
+ elif library.name == "np":
49
+ if counts is not None:
50
+ assert (
51
+ numpy.unique(counts[1:] - counts[:-1]).size == 1
52
+ ), "The byte offsets must be uniform for NumPy arrays."
53
+
54
+ bytes_per_event = counts[0]
55
+ return data.reshape(-1, bytes_per_event)
56
+ else:
57
+ fSize = branch.streamer.member("fSize")
58
+ return data.reshape(-1, fSize).view(">u1")
59
+ else:
60
+ raise ValueError(
61
+ f"Unsupported library: {library.name}, can only use 'ak' or 'np'."
62
+ )
63
+
64
+ def final_array(
65
+ self,
66
+ basket_arrays,
67
+ entry_start,
68
+ entry_stop,
69
+ entry_offsets,
70
+ library,
71
+ branch,
72
+ options,
73
+ ):
74
+ basket_entry_starts = numpy.array(entry_offsets[:-1])
75
+ basket_entry_stops = numpy.array(entry_offsets[1:])
76
+
77
+ basket_start_idx = numpy.where(basket_entry_starts <= entry_start)[0].max()
78
+ basket_end_idx = numpy.where(basket_entry_stops >= entry_stop)[0].min()
79
+
80
+ arr_to_concat = [basket_arrays[i] for i in range(basket_start_idx, basket_end_idx + 1)]
81
+
82
+ relative_entry_start = entry_start - basket_entry_starts[basket_start_idx]
83
+ relative_entry_stop = entry_stop - basket_entry_starts[basket_start_idx]
84
+
85
+ if library.name == "ak":
86
+ awkward = uproot.extras.awkward()
87
+ return awkward.concatenate(arr_to_concat)[relative_entry_start:relative_entry_stop]
88
+
89
+ elif library.name == "np":
90
+ return numpy.concatenate(arr_to_concat)[relative_entry_start:relative_entry_stop]
91
+
92
+ else:
93
+ raise ValueError(
94
+ f"Unsupported library: {library.name}, can only use 'ak' or 'np'."
95
+ )
@@ -1,44 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
- import re
4
-
3
+ import awkward as ak
5
4
  import numpy as np
6
5
  import uproot
7
6
  import uproot.behaviors.TBranch
8
7
  import uproot.interpretation
9
8
 
10
9
  from uproot_custom.readers import read_branch
11
-
12
-
13
- def regularize_object_path(object_path: str) -> str:
14
- return re.sub(r";[0-9]+", r"", object_path)
15
-
16
-
17
- _title_has_dims = re.compile(r"^([^\[\]]*)(\[[^\[\]]+\])+")
18
- _item_dim_pattern = re.compile(r"\[([1-9][0-9]*)\]")
19
- _item_any_pattern = re.compile(r"\[(.*)\]")
20
-
21
-
22
- def get_dims_from_branch(
23
- branch: uproot.behaviors.TBranch.TBranch,
24
- ) -> tuple[tuple[int, ...], bool]:
25
- leaf = branch.member("fLeaves")[0]
26
- title = leaf.member("fTitle")
27
-
28
- dims, is_jagged = (), False
29
-
30
- m = _title_has_dims.match(title)
31
- if m is not None:
32
- dims = tuple(int(x) for x in re.findall(_item_dim_pattern, title))
33
- if dims == () and leaf.member("fLen") > 1:
34
- dims = (leaf.member("fLen"),)
35
-
36
- if any(
37
- _item_dim_pattern.match(x) is None for x in re.findall(_item_any_pattern, title)
38
- ):
39
- is_jagged = True
40
-
41
- return dims, is_jagged
10
+ from uproot_custom.utils import get_dims_from_branch, regularize_object_path
42
11
 
43
12
 
44
13
  class AsCustom(uproot.interpretation.Interpretation):
@@ -65,6 +34,7 @@ class AsCustom(uproot.interpretation.Interpretation):
65
34
  self._branch = branch
66
35
  self._context = context
67
36
  self._simplify = simplify
37
+ self._typename = None
68
38
 
69
39
  # simplify streamer information
70
40
  self.all_streamer_info: dict[str, list[dict]] = {}
@@ -100,12 +70,14 @@ class AsCustom(uproot.interpretation.Interpretation):
100
70
  """
101
71
  The name of the type of the interpretation.
102
72
  """
103
- dims, is_jagged = get_dims_from_branch(self._branch)
104
- typename = self._branch.streamer.typename
105
- if dims:
106
- for i in dims:
107
- typename += f"[{i}]"
108
- return typename
73
+ if self._typename is None:
74
+ dims, is_jagged = get_dims_from_branch(self._branch)
75
+ typename = self._branch.streamer.typename
76
+ if dims:
77
+ for i in dims:
78
+ typename += f"[{i}]"
79
+ self._typename = typename
80
+ return self._typename
109
81
 
110
82
  @property
111
83
  def cache_key(self) -> str:
@@ -133,9 +105,6 @@ class AsCustom(uproot.interpretation.Interpretation):
133
105
  """
134
106
  Concatenate the arrays from the baskets and return the final array.
135
107
  """
136
-
137
- awkward = uproot.extras.awkward()
138
-
139
108
  basket_entry_starts = np.array(entry_offsets[:-1])
140
109
  basket_entry_stops = np.array(entry_offsets[1:])
141
110
 
@@ -143,7 +112,7 @@ class AsCustom(uproot.interpretation.Interpretation):
143
112
  basket_end_idx = np.where(basket_entry_stops >= entry_stop)[0].min()
144
113
 
145
114
  arr_to_concat = [basket_arrays[i] for i in range(basket_start_idx, basket_end_idx + 1)]
146
- tot_array = awkward.concatenate(arr_to_concat)
115
+ tot_array = ak.concatenate(arr_to_concat)
147
116
 
148
117
  relative_entry_start = entry_start - basket_entry_starts[basket_start_idx]
149
118
  relative_entry_stop = entry_stop - basket_entry_starts[basket_start_idx]
@@ -165,10 +134,18 @@ class AsCustom(uproot.interpretation.Interpretation):
165
134
 
166
135
  full_branch_path = regularize_object_path(branch.object_path)
167
136
 
137
+ if branch.streamer is None:
138
+ cls_streamer_info = {
139
+ "fName": branch.name,
140
+ "fTypeName": self.typename,
141
+ }
142
+ else:
143
+ cls_streamer_info = branch.streamer.all_members
144
+
168
145
  return read_branch(
169
146
  data,
170
147
  byte_offsets,
171
- branch.streamer.all_members,
148
+ cls_streamer_info,
172
149
  self.all_streamer_info,
173
150
  full_branch_path,
174
151
  )
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ import uproot
4
+ import uproot.behaviors.TBranch
5
+ import uproot.behaviors.TBranchElement
6
+ import uproot.interpretation
7
+
8
+ from uproot_custom.AsCustom import AsCustom
9
+ from uproot_custom.utils import get_map_key_val_typenames
10
+
11
+
12
+ class AsGroupedMap(AsCustom):
13
+ target_branches: set[str] = set()
14
+
15
+ def __init__(
16
+ self,
17
+ branch: uproot.behaviors.TBranch.TBranch,
18
+ context: dict,
19
+ simplify: bool,
20
+ ):
21
+ AsCustom.__init__(self, branch, context, simplify)
22
+
23
+ # 1:vector, 2:list, 3:deque, 4:map, 5:set, 6:multimap, 7:multiset, 12:unordered_map
24
+ stl_type = branch.parent.streamer.stl_type
25
+ assert stl_type in (
26
+ 4,
27
+ 6,
28
+ 12,
29
+ ), f"Only map and multimap are supported for STL grouped branches, but got {stl_type}."
30
+
31
+ key_type_name, val_type_name = get_map_key_val_typenames(
32
+ branch.parent.streamer.typename
33
+ )
34
+
35
+ if branch == branch.parent.branches[0]:
36
+ self._typename = key_type_name + "[]"
37
+
38
+ elif branch == branch.parent.branches[1]:
39
+ self._typename = val_type_name + "[]"
40
+
41
+ else:
42
+ raise ValueError(
43
+ f"Branch {branch.name} not found in its parent branch {branch.parent.name}."
44
+ )
@@ -3,7 +3,8 @@ import uproot.behaviors.TBranch
3
3
  import uproot.interpretation.identify
4
4
 
5
5
  from uproot_custom.AsBinary import AsBinary
6
- from uproot_custom.AsCustom import AsCustom, regularize_object_path
6
+ from uproot_custom.AsCustom import AsCustom
7
+ from uproot_custom.AsGroupedMap import AsGroupedMap
7
8
  from uproot_custom.readers import (
8
9
  BaseObjectReader,
9
10
  BaseReader,
@@ -22,11 +23,11 @@ from uproot_custom.readers import (
22
23
  reconstruct_array,
23
24
  registered_readers,
24
25
  )
26
+ from uproot_custom.utils import regularize_object_path
25
27
 
26
28
  ##########################################################################################
27
29
  # Wrappers
28
30
  ##########################################################################################
29
- _is_TBranchElement_branches_wrapped = False
30
31
  _is_uproot_interpretation_of_wrapped = False
31
32
 
32
33
  _uproot_interpretation_of = uproot.interpretation.identify.interpretation_of
@@ -38,6 +39,9 @@ def custom_interpretation_of(
38
39
  if not hasattr(branch, "parent"):
39
40
  return _uproot_interpretation_of(branch, context, simplify)
40
41
 
42
+ if AsGroupedMap.match_branch(branch, context, simplify):
43
+ return AsGroupedMap(branch, context, simplify)
44
+
41
45
  if AsCustom.match_branch(branch, context, simplify):
42
46
  return AsCustom(branch, context, simplify)
43
47
 
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '1.1.3.dev1'
32
- __version_tuple__ = version_tuple = (1, 1, 3, 'dev1')
31
+ __version__ = version = '1.3.0'
32
+ __version_tuple__ = version_tuple = (1, 3, 0)
33
33
 
34
- __commit_id__ = commit_id = 'gab4d2fe79'
34
+ __commit_id__ = commit_id = 'ga387b49b1'
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import re
4
3
  from typing import Union
5
4
 
6
5
  import awkward as ak
@@ -9,17 +8,15 @@ import awkward.index
9
8
  import numpy as np
10
9
 
11
10
  import uproot_custom.cpp
11
+ from uproot_custom.utils import (
12
+ get_map_key_val_typenames,
13
+ get_sequence_element_typename,
14
+ get_top_type_name,
15
+ )
12
16
 
13
17
  registered_readers: set[type["BaseReader"]] = set()
14
18
 
15
19
 
16
- def get_top_type_name(type_name: str) -> str:
17
- if type_name.endswith("*"):
18
- type_name = type_name[:-1].strip()
19
- type_name = type_name.replace("std::", "").strip()
20
- return type_name.split("<")[0]
21
-
22
-
23
20
  def gen_tree_config(
24
21
  cls_streamer_info: dict,
25
22
  all_streamer_info: dict,
@@ -273,18 +270,6 @@ stl_typenames = {
273
270
 
274
271
 
275
272
  class STLSeqReader(BaseReader):
276
- @staticmethod
277
- def get_sequence_element_typename(type_name: str) -> str:
278
- """
279
- Get the element type name of a vector type.
280
-
281
- e.g. vector<vector<int>> -> vector<int>
282
- """
283
- type_name = (
284
- type_name.replace("std::", "").replace("< ", "<").replace(" >", ">").strip()
285
- )
286
- return re.match(r"^(vector|array|list|set|unordered_set)<(.*)>$", type_name).group(2)
287
-
288
273
  @classmethod
289
274
  def gen_tree_config(
290
275
  cls,
@@ -299,7 +284,7 @@ class STLSeqReader(BaseReader):
299
284
 
300
285
  fName = cls_streamer_info["fName"]
301
286
  fTypeName = cls_streamer_info["fTypeName"]
302
- element_type = cls.get_sequence_element_typename(fTypeName)
287
+ element_type = get_sequence_element_typename(fTypeName)
303
288
  element_info = {
304
289
  "fName": fName,
305
290
  "fTypeName": element_type,
@@ -356,18 +341,6 @@ class STLMapReader(BaseReader):
356
341
  This class reads std::map from a binary parser.
357
342
  """
358
343
 
359
- @staticmethod
360
- def get_map_key_val_typenames(type_name: str) -> tuple[str, str]:
361
- """
362
- Get the key and value type names of a map type.
363
-
364
- e.g. map<int, vector<int>> -> (int, vector<int>)
365
- """
366
- type_name = (
367
- type_name.replace("std::", "").replace("< ", "<").replace(" >", ">").strip()
368
- )
369
- return re.match(r"^(map|unordered_map|multimap)<(.*),(.*)>$", type_name).groups()[1:3]
370
-
371
344
  @classmethod
372
345
  def gen_tree_config(
373
346
  cls,
@@ -381,7 +354,7 @@ class STLMapReader(BaseReader):
381
354
  return None
382
355
 
383
356
  fTypeName = cls_streamer_info["fTypeName"]
384
- key_type_name, val_type_name = cls.get_map_key_val_typenames(fTypeName)
357
+ key_type_name, val_type_name = get_map_key_val_typenames(fTypeName)
385
358
 
386
359
  fName = cls_streamer_info["fName"]
387
360
  key_info = {
@@ -679,30 +652,37 @@ class CArrayReader(BaseReader):
679
652
  item_path,
680
653
  called_from_top,
681
654
  ):
682
- if cls_streamer_info.get("fArrayDim", 0) == 0:
655
+ fTypeName = cls_streamer_info.get("fTypeName", "")
656
+ if not fTypeName.endswith("[]") and cls_streamer_info.get("fArrayDim", 0) == 0:
683
657
  return None
684
658
 
685
659
  fName = cls_streamer_info["fName"]
686
- fTypeName = cls_streamer_info["fTypeName"]
687
- fArrayDim = cls_streamer_info["fArrayDim"]
688
- fMaxIndex = cls_streamer_info["fMaxIndex"]
660
+
661
+ if fTypeName.endswith("[]"):
662
+ fArrayDim = -1
663
+ fMaxIndex = -1
664
+ flat_size = -1
665
+ else:
666
+ fArrayDim = cls_streamer_info["fArrayDim"]
667
+ fMaxIndex = cls_streamer_info["fMaxIndex"]
668
+ flat_size = np.prod(fMaxIndex[:fArrayDim])
689
669
 
690
670
  element_streamer_info = cls_streamer_info.copy()
691
671
  element_streamer_info["fArrayDim"] = 0
672
+ while fTypeName.endswith("[]"):
673
+ fTypeName = fTypeName[:-2]
674
+ element_streamer_info["fTypeName"] = fTypeName
692
675
 
693
676
  element_tree_config = gen_tree_config(
694
677
  element_streamer_info,
695
678
  all_streamer_info,
696
679
  )
697
680
 
698
- flat_size = np.prod(fMaxIndex[:fArrayDim])
699
- assert flat_size > 0, f"flatten_size should be greater than 0, but got {flat_size}"
681
+ assert flat_size != 0, f"flatten_size should cannot be 0."
700
682
 
701
683
  # c-type number or TArray
702
- if (
703
- top_type_name in BasicTypeReader.typenames
704
- or top_type_name in TArrayReader.typenames
705
- ):
684
+ top_type_name = get_top_type_name(fTypeName)
685
+ if top_type_name in BasicTypeReader.typenames or fTypeName in TArrayReader.typenames:
706
686
  return {
707
687
  "reader": cls,
708
688
  "name": fName,
@@ -728,13 +708,27 @@ class CArrayReader(BaseReader):
728
708
  # STL
729
709
  elif top_type_name in stl_typenames:
730
710
  element_tree_config["with_header"] = False
711
+
731
712
  is_obj = not called_from_top
732
713
  if cls_streamer_info.get("fType", 0) == 500:
733
714
  is_obj = True
715
+
716
+ # when is a ragged array, vector/map will have a reader
717
+ element_reader = element_tree_config.get("reader", None)
718
+ if (
719
+ flat_size < 0
720
+ and element_reader is not None
721
+ and element_reader != BasicTypeReader
722
+ ):
723
+ is_obj = True
724
+
725
+ is_stdmap = top_type_name in ["map", "unordered_map", "multimap"]
726
+
734
727
  return {
735
728
  "reader": cls,
736
729
  "name": fName,
737
730
  "is_obj": is_obj,
731
+ "is_stdmap": is_stdmap,
738
732
  "flat_size": flat_size,
739
733
  "element_reader": element_tree_config,
740
734
  "fMaxIndex": fMaxIndex,
@@ -755,6 +749,7 @@ class CArrayReader(BaseReader):
755
749
  return uproot_custom.cpp.CArrayReader(
756
750
  tree_config["name"],
757
751
  tree_config["is_obj"],
752
+ tree_config.get("is_stdmap", False),
758
753
  tree_config["flat_size"],
759
754
  element_reader,
760
755
  )
@@ -765,19 +760,33 @@ class CArrayReader(BaseReader):
765
760
  return None
766
761
 
767
762
  element_tree_config = tree_config["element_reader"]
768
- fMaxIndex = tree_config["fMaxIndex"]
769
- fArrayDim = tree_config["fArrayDim"]
770
- shape = [fMaxIndex[i] for i in range(fArrayDim)]
771
-
772
- element_data = reconstruct_array(
773
- raw_data,
774
- element_tree_config,
775
- )
776
-
777
- for s in shape[::-1]:
778
- element_data = awkward.contents.RegularArray(element_data, int(s))
779
-
780
- return element_data
763
+ flat_size = tree_config["flat_size"]
764
+
765
+ if flat_size > 0:
766
+ fMaxIndex = tree_config["fMaxIndex"]
767
+ fArrayDim = tree_config["fArrayDim"]
768
+ shape = [fMaxIndex[i] for i in range(fArrayDim)]
769
+
770
+ element_data = reconstruct_array(
771
+ raw_data,
772
+ element_tree_config,
773
+ )
774
+
775
+ for s in shape[::-1]:
776
+ element_data = awkward.contents.RegularArray(element_data, int(s))
777
+
778
+ return element_data
779
+
780
+ else: # ragged array
781
+ offsets, element_raw_data = raw_data
782
+ element_data = reconstruct_array(
783
+ element_raw_data,
784
+ element_tree_config,
785
+ )
786
+ return ak.contents.ListOffsetArray(
787
+ ak.index.Index64(offsets),
788
+ element_data,
789
+ )
781
790
 
782
791
 
783
792
  class BaseObjectReader(BaseReader):
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ import uproot.behaviors.TBranch
6
+
7
+
8
+ def regularize_object_path(object_path: str) -> str:
9
+ return re.sub(r";[0-9]+", r"", object_path)
10
+
11
+
12
+ _title_has_dims = re.compile(r"^([^\[\]]*)(\[[^\[\]]+\])+")
13
+ _item_dim_pattern = re.compile(r"\[([1-9][0-9]*)\]")
14
+ _item_any_pattern = re.compile(r"\[(.*)\]")
15
+
16
+
17
+ def get_dims_from_branch(
18
+ branch: uproot.behaviors.TBranch.TBranch,
19
+ ) -> tuple[tuple[int, ...], bool]:
20
+ leaf = branch.member("fLeaves")[0]
21
+ title = leaf.member("fTitle")
22
+
23
+ dims, is_jagged = (), False
24
+
25
+ m = _title_has_dims.match(title)
26
+ if m is not None:
27
+ dims = tuple(int(x) for x in re.findall(_item_dim_pattern, title))
28
+ if dims == () and leaf.member("fLen") > 1:
29
+ dims = (leaf.member("fLen"),)
30
+
31
+ if any(
32
+ _item_dim_pattern.match(x) is None for x in re.findall(_item_any_pattern, title)
33
+ ):
34
+ is_jagged = True
35
+
36
+ return dims, is_jagged
37
+
38
+
39
+ def get_top_type_name(type_name: str) -> str:
40
+ if type_name.endswith("*"):
41
+ type_name = type_name[:-1].strip()
42
+ type_name = type_name.replace("std::", "").strip()
43
+ return type_name.split("<")[0]
44
+
45
+
46
+ def get_sequence_element_typename(type_name: str) -> str:
47
+ """
48
+ Get the element type name of a vector type.
49
+
50
+ e.g. vector<vector<int>> -> vector<int>
51
+ """
52
+ type_name = type_name.replace("std::", "").replace("< ", "<").replace(" >", ">").strip()
53
+ return re.match(r"^(vector|array|list|set|unordered_set)<(.*)>$", type_name).group(2)
54
+
55
+
56
+ def get_map_key_val_typenames(type_name: str) -> tuple[str, str]:
57
+ """
58
+ Get the key and value type names of a map type.
59
+
60
+ e.g. map<int, vector<int>> -> (int, vector<int>)
61
+ """
62
+ type_name = type_name.replace("std::", "").replace("< ", "<").replace(" >", ">").strip()
63
+ return re.match(r"^(map|unordered_map|multimap)<(.*),(.*)>$", type_name).groups()[1:3]
@@ -1,32 +0,0 @@
1
- from pathlib import Path
2
-
3
- import uproot
4
- import uproot_custom
5
-
6
-
7
- uproot_custom.AsCustom.target_branches |= {
8
- "/my_tree:my_obj/m_carr_vec_int[3]",
9
- "/my_tree:my_obj/m_int",
10
- "/my_tree:my_obj/m_carr_tstring[3]",
11
- "/my_tree:my_obj/m_carr2d_vec_int[2][3]",
12
- "/my_tree:my_obj/m_carr2d_tstring[2][3]",
13
- "/my_tree:complicated_stl/m_arr_vec_int[5]",
14
- "/my_tree:complicated_stl/m_vec_uset_int",
15
- "/my_tree:complicated_stl/m_vec_list_int",
16
- "/my_tree:complicated_stl/m_list_set_int",
17
- }
18
-
19
-
20
- def test_AsCustom_1():
21
- f = uproot.open(Path(__file__).parent / "test-data-1.root")
22
- tree = f["my_tree"]
23
- arr = tree.arrays()
24
-
25
-
26
- def test_AsCustom_2():
27
- f = uproot.open(Path(__file__).parent / "test-data-2.root")
28
- tree = f["my_tree"]
29
- tree["complicated_stl/m_arr_vec_int[5]"].array()
30
- tree["complicated_stl/m_vec_uset_int"].array()
31
- tree["complicated_stl/m_vec_list_int"].array()
32
- tree["complicated_stl/m_list_set_int"].array()
@@ -1,62 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import awkward
4
- import numpy
5
- import uproot
6
- import uproot.extras
7
- import uproot.interpretation
8
-
9
-
10
- class AsBinary(uproot.interpretation.Interpretation):
11
- """
12
- Return binary data of the ``TBasket``. Pass an instance of this class
13
- to :ref:`uproot.behaviors.TBranch.TBranch.array` like this:
14
-
15
- .. code-block:: python
16
- binary_data = branch.array(interpretation=AsBinary())
17
-
18
- """
19
-
20
- @property
21
- def cache_key(self) -> str:
22
- return id(self)
23
-
24
- def basket_array(
25
- self,
26
- data,
27
- byte_offsets,
28
- basket,
29
- branch,
30
- context,
31
- cursor_offset,
32
- library,
33
- interp_options,
34
- ):
35
- counts = byte_offsets[1:] - byte_offsets[:-1]
36
- return awkward.unflatten(data, counts)
37
-
38
- def final_array(
39
- self,
40
- basket_arrays,
41
- entry_start,
42
- entry_stop,
43
- entry_offsets,
44
- library,
45
- branch,
46
- options,
47
- ):
48
- basket_entry_starts = numpy.array(entry_offsets[:-1])
49
- basket_entry_stops = numpy.array(entry_offsets[1:])
50
-
51
- basket_start_idx = numpy.where(basket_entry_starts <= entry_start)[0].max()
52
- basket_end_idx = numpy.where(basket_entry_stops >= entry_stop)[0].min()
53
-
54
- arr_to_concat = [basket_arrays[i] for i in range(basket_start_idx, basket_end_idx + 1)]
55
-
56
- awkward = uproot.extras.awkward()
57
- tot_array = awkward.concatenate(arr_to_concat)
58
-
59
- relative_entry_start = entry_start - basket_entry_starts[basket_start_idx]
60
- relative_entry_stop = entry_stop - basket_entry_starts[basket_start_idx]
61
-
62
- return tot_array[relative_entry_start:relative_entry_stop]