tskit 1.0.1__cp314-cp314-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tskit/tables.py ADDED
@@ -0,0 +1,4858 @@
1
+ #
2
+ # MIT License
3
+ #
4
+ # Copyright (c) 2018-2024 Tskit Developers
5
+ # Copyright (c) 2017 University of Oxford
6
+ #
7
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ # of this software and associated documentation files (the "Software"), to deal
9
+ # in the Software without restriction, including without limitation the rights
10
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ # copies of the Software, and to permit persons to whom the Software is
12
+ # furnished to do so, subject to the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be included in all
15
+ # copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ # SOFTWARE.
24
+ """
25
+ Tree sequence IO via the tables API.
26
+ """
27
+ import collections
28
+ import dataclasses
29
+ import datetime
30
+ import json
31
+ import numbers
32
+ import operator
33
+ import warnings
34
+ from dataclasses import dataclass
35
+
36
+ import numpy as np
37
+
38
+ import _tskit
39
+ import tskit
40
+ import tskit.metadata as metadata
41
+ import tskit.provenance as provenance
42
+ import tskit.util as util
43
+ from tskit import UNKNOWN_TIME
44
+ from tskit.exceptions import ImmutableTableError
45
+
46
+ dataclass_options = {"frozen": True}
47
+
48
+
49
+ def _ragged_selection_indices(indexed_offsets, lengths64):
50
+ """
51
+ Return absolute indices into a ragged column for the provided row selection.
52
+ """
53
+ total = int(lengths64.sum())
54
+ if total == 0:
55
+ return np.empty(0, dtype=np.int64)
56
+ row_ids = np.repeat(np.arange(lengths64.size, dtype=np.int64), lengths64)
57
+ start_offsets = indexed_offsets.astype(np.int64, copy=False)[row_ids]
58
+ within_row = np.arange(total, dtype=np.int64) - np.repeat(
59
+ np.cumsum(lengths64, dtype=np.int64) - lengths64, lengths64
60
+ )
61
+ return start_offsets + within_row
62
+
63
+
64
+ @metadata.lazy_decode()
65
+ @dataclass(**dataclass_options)
66
+ class IndividualTableRow(util.Dataclass):
67
+ """
68
+ A row in an :class:`IndividualTable`.
69
+ """
70
+
71
+ __slots__ = ["flags", "location", "parents", "metadata"]
72
+ flags: int
73
+ """
74
+ See :attr:`Individual.flags`
75
+ """
76
+ location: np.ndarray
77
+ """
78
+ See :attr:`Individual.location`
79
+ """
80
+ parents: np.ndarray
81
+ """
82
+ See :attr:`Individual.parents`
83
+ """
84
+ metadata: bytes | dict | None
85
+ """
86
+ See :attr:`Individual.metadata`
87
+ """
88
+
89
+ # We need a custom eq for the numpy arrays
90
+ def __eq__(self, other):
91
+ return (
92
+ isinstance(other, IndividualTableRow)
93
+ and self.flags == other.flags
94
+ and np.array_equal(self.location, other.location)
95
+ and np.array_equal(self.parents, other.parents)
96
+ and self.metadata == other.metadata
97
+ )
98
+
99
+
100
+ @metadata.lazy_decode()
101
+ @dataclass(**dataclass_options)
102
+ class NodeTableRow(util.Dataclass):
103
+ """
104
+ A row in a :class:`NodeTable`.
105
+ """
106
+
107
+ __slots__ = ["flags", "time", "population", "individual", "metadata"]
108
+ flags: int
109
+ """
110
+ See :attr:`Node.flags`
111
+ """
112
+ time: float
113
+ """
114
+ See :attr:`Node.time`
115
+ """
116
+ population: int
117
+ """
118
+ See :attr:`Node.population`
119
+ """
120
+ individual: int
121
+ """
122
+ See :attr:`Node.individual`
123
+ """
124
+ metadata: bytes | dict | None
125
+ """
126
+ See :attr:`Node.metadata`
127
+ """
128
+
129
+
130
+ @metadata.lazy_decode()
131
+ @dataclass(**dataclass_options)
132
+ class EdgeTableRow(util.Dataclass):
133
+ """
134
+ A row in an :class:`EdgeTable`.
135
+ """
136
+
137
+ __slots__ = ["left", "right", "parent", "child", "metadata"]
138
+ left: float
139
+ """
140
+ See :attr:`Edge.left`
141
+ """
142
+ right: float
143
+ """
144
+ See :attr:`Edge.right`
145
+ """
146
+ parent: int
147
+ """
148
+ See :attr:`Edge.parent`
149
+ """
150
+ child: int
151
+ """
152
+ See :attr:`Edge.child`
153
+ """
154
+ metadata: bytes | dict | None
155
+ """
156
+ See :attr:`Edge.metadata`
157
+ """
158
+
159
+
160
+ @metadata.lazy_decode()
161
+ @dataclass(**dataclass_options)
162
+ class MigrationTableRow(util.Dataclass):
163
+ """
164
+ A row in a :class:`MigrationTable`.
165
+ """
166
+
167
+ __slots__ = ["left", "right", "node", "source", "dest", "time", "metadata"]
168
+ left: float
169
+ """
170
+ See :attr:`Migration.left`
171
+ """
172
+ right: float
173
+ """
174
+ See :attr:`Migration.right`
175
+ """
176
+ node: int
177
+ """
178
+ See :attr:`Migration.node`
179
+ """
180
+ source: int
181
+ """
182
+ See :attr:`Migration.source`
183
+ """
184
+ dest: int
185
+ """
186
+ See :attr:`Migration.dest`
187
+ """
188
+ time: float
189
+ """
190
+ See :attr:`Migration.time`
191
+ """
192
+ metadata: bytes | dict | None
193
+ """
194
+ See :attr:`Migration.metadata`
195
+ """
196
+
197
+
198
+ @metadata.lazy_decode()
199
+ @dataclass(**dataclass_options)
200
+ class SiteTableRow(util.Dataclass):
201
+ """
202
+ A row in a :class:`SiteTable`.
203
+ """
204
+
205
+ __slots__ = ["position", "ancestral_state", "metadata"]
206
+ position: float
207
+ """
208
+ See :attr:`Site.position`
209
+ """
210
+ ancestral_state: str
211
+ """
212
+ See :attr:`Site.ancestral_state`
213
+ """
214
+ metadata: bytes | dict | None
215
+ """
216
+ See :attr:`Site.metadata`
217
+ """
218
+
219
+
220
+ @metadata.lazy_decode()
221
+ @dataclass(**dataclass_options)
222
+ class MutationTableRow(util.Dataclass):
223
+ """
224
+ A row in a :class:`MutationTable`.
225
+ """
226
+
227
+ __slots__ = ["site", "node", "derived_state", "parent", "metadata", "time"]
228
+ site: int
229
+ """
230
+ See :attr:`Mutation.site`
231
+ """
232
+ node: int
233
+ """
234
+ See :attr:`Mutation.node`
235
+ """
236
+ derived_state: str
237
+ """
238
+ See :attr:`Mutation.derived_state`
239
+ """
240
+ parent: int
241
+ """
242
+ See :attr:`Mutation.parent`
243
+ """
244
+ metadata: bytes | dict | None
245
+ """
246
+ See :attr:`Mutation.metadata`
247
+ """
248
+ time: float
249
+ """
250
+ See :attr:`Mutation.time`
251
+ """
252
+
253
+ # We need a custom eq here as we have unknown times (nans) to check
254
+ def __eq__(self, other):
255
+ return (
256
+ isinstance(other, MutationTableRow)
257
+ and self.site == other.site
258
+ and self.node == other.node
259
+ and self.derived_state == other.derived_state
260
+ and self.parent == other.parent
261
+ and self.metadata == other.metadata
262
+ and (
263
+ self.time == other.time
264
+ or (
265
+ util.is_unknown_time(self.time) and util.is_unknown_time(other.time)
266
+ )
267
+ )
268
+ )
269
+
270
+
271
+ @metadata.lazy_decode()
272
+ @dataclass(**dataclass_options)
273
+ class PopulationTableRow(util.Dataclass):
274
+ """
275
+ A row in a :class:`PopulationTable`.
276
+ """
277
+
278
+ __slots__ = ["metadata"]
279
+ metadata: bytes | dict | None
280
+ """
281
+ See :attr:`Population.metadata`
282
+ """
283
+
284
+
285
+ @dataclass(**dataclass_options)
286
+ class ProvenanceTableRow(util.Dataclass):
287
+ """
288
+ A row in a :class:`ProvenanceTable`.
289
+ """
290
+
291
+ __slots__ = ["timestamp", "record"]
292
+ timestamp: str
293
+ """
294
+ See :attr:`Provenance.timestamp`
295
+ """
296
+ record: str
297
+ """
298
+ See :attr:`Provenance.record`
299
+ """
300
+
301
+
302
+ @dataclass(**dataclass_options)
303
+ class TableCollectionIndexes(util.Dataclass):
304
+ """
305
+ A class encapsulating the indexes of a :class:`TableCollection`
306
+ """
307
+
308
+ edge_insertion_order: np.ndarray = None
309
+ edge_removal_order: np.ndarray = None
310
+
311
+ def asdict(self):
312
+ return {k: v for k, v in dataclasses.asdict(self).items() if v is not None}
313
+
314
+ @property
315
+ def nbytes(self) -> int:
316
+ """
317
+ The number of bytes taken by the indexes
318
+ """
319
+ total = 0
320
+ if self.edge_removal_order is not None:
321
+ total += self.edge_removal_order.nbytes
322
+ if self.edge_insertion_order is not None:
323
+ total += self.edge_insertion_order.nbytes
324
+ return total
325
+
326
+
327
+ def keep_with_offset(keep, data, offset):
328
+ """
329
+ Used when filtering _offset columns in tables
330
+ """
331
+ # We need the astype here for 32 bit machines
332
+ lens = np.diff(offset).astype(np.int32)
333
+ return (
334
+ data[np.repeat(keep, lens)],
335
+ np.concatenate(
336
+ [
337
+ np.array([0], dtype=offset.dtype),
338
+ np.cumsum(lens[keep], dtype=offset.dtype),
339
+ ]
340
+ ),
341
+ )
342
+
343
+
344
+ class BaseTable:
345
+ # Base class for all tables, with only immutable methods,
346
+ # or those that don't use separate low-level table implementations.
347
+
348
+ # The list of columns in the table. Must be set by subclasses.
349
+ column_names = []
350
+ mutable = None
351
+
352
+ def _check_required_args(self, **kwargs):
353
+ for k, v in kwargs.items():
354
+ if v is None:
355
+ raise TypeError(f"{k} is required")
356
+
357
+ @property
358
+ def nbytes(self) -> int:
359
+ """
360
+ Returns the total number of bytes required to store the data
361
+ in this table. Note that this may not be equal to
362
+ the actual memory footprint.
363
+ """
364
+ # It's not ideal that we run asdict() here to do this as we're
365
+ # currently creating copies of the column arrays, so it would
366
+ # be more efficient to have dedicated low-level methods. However,
367
+ # if we do have read-only views on the underlying memory for the
368
+ # column arrays then this will be a perfectly good way of
369
+ # computing the nbytes values and the overhead minimal.
370
+ d = self.asdict()
371
+ nbytes = 0
372
+ # Some tables don't have a metadata_schema
373
+ metadata_schema = d.pop("metadata_schema", None)
374
+ if metadata_schema is not None:
375
+ nbytes += len(metadata_schema.encode())
376
+ nbytes += sum(col.nbytes for col in d.values())
377
+ return nbytes
378
+
379
+ def _equals_internal(
380
+ self, other, ignore_metadata=False, *, ignore_timestamps=False
381
+ ):
382
+
383
+ if self is other:
384
+ return True
385
+
386
+ if not isinstance(other, BaseTable) or self.table_name != other.table_name:
387
+ return False
388
+
389
+ # Can only use mutable fast path if both tables are mutable
390
+ base = self
391
+ if self.mutable and not other.mutable:
392
+ base = other
393
+ other = self
394
+ return base._fast_equals(
395
+ other, ignore_metadata=ignore_metadata, ignore_timestamps=ignore_timestamps
396
+ )
397
+
398
+ def equals(self, other, ignore_metadata=False):
399
+ """
400
+ Returns True if `self` and `other` are equal. By default, two tables
401
+ are considered equal if their columns and metadata schemas are
402
+ byte-for-byte identical.
403
+
404
+ :param other: Another table instance
405
+ :param bool ignore_metadata: If True exclude metadata and metadata schemas
406
+ from the comparison.
407
+ :return: True if other is equal to this table; False otherwise.
408
+ :rtype: bool
409
+ """
410
+ return self._equals_internal(
411
+ other, ignore_metadata=ignore_metadata, ignore_timestamps=False
412
+ )
413
+
414
+ def _assert_equals_internal(
415
+ self, other, *, ignore_metadata=False, ignore_timestamps=False
416
+ ):
417
+ if self is other:
418
+ return
419
+ if not isinstance(other, BaseTable) or self.table_name != other.table_name:
420
+ raise AssertionError(f"Types differ: self={type(self)} other={type(other)}")
421
+
422
+ if not self._equals_internal(
423
+ other, ignore_metadata=ignore_metadata, ignore_timestamps=ignore_timestamps
424
+ ):
425
+ self._assert_equals(
426
+ other,
427
+ ignore_metadata=ignore_metadata,
428
+ ignore_timestamps=ignore_timestamps,
429
+ )
430
+
431
+ def assert_equals(self, other, *, ignore_metadata=False):
432
+ """
433
+ Raise an AssertionError for the first found difference between
434
+ this and another table of the same type.
435
+
436
+ :param other: Another table instance
437
+ :param bool ignore_metadata: If True exclude metadata and metadata schemas
438
+ from the comparison.
439
+ """
440
+ self._assert_equals_internal(
441
+ other, ignore_metadata=ignore_metadata, ignore_timestamps=False
442
+ )
443
+
444
+ def _assert_equals(self, other, *, ignore_metadata=False, ignore_timestamps=False):
445
+ if (
446
+ not ignore_metadata
447
+ and hasattr(self, "metadata_schema")
448
+ and hasattr(other, "metadata_schema")
449
+ and self.metadata_schema != other.metadata_schema
450
+ ):
451
+ raise AssertionError(
452
+ f"{type(self).__name__} metadata schemas differ: "
453
+ f"self={self.metadata_schema} "
454
+ f"other={other.metadata_schema}"
455
+ )
456
+
457
+ for n, (row_self, row_other) in enumerate(zip(self, other)):
458
+ if ignore_metadata:
459
+ row_self = dataclasses.replace(row_self, metadata=None)
460
+ row_other = dataclasses.replace(row_other, metadata=None)
461
+ if ignore_timestamps:
462
+ row_self = dataclasses.replace(row_self, timestamp=None)
463
+ row_other = dataclasses.replace(row_other, timestamp=None)
464
+ if row_self != row_other:
465
+ self_dict = dataclasses.asdict(self[n])
466
+ other_dict = dataclasses.asdict(other[n])
467
+ diff_string = []
468
+ for col in self_dict.keys():
469
+ if ignore_timestamps and col == "timestamp":
470
+ continue
471
+ if isinstance(self_dict[col], np.ndarray):
472
+ equal = np.array_equal(self_dict[col], other_dict[col])
473
+ else:
474
+ equal = self_dict[col] == other_dict[col]
475
+ if not equal:
476
+ diff_string.append(
477
+ f"self.{col}={self_dict[col]} other.{col}={other_dict[col]}"
478
+ )
479
+ diff_string = "\n".join(diff_string)
480
+ raise AssertionError(
481
+ f"{type(self).__name__} row {n} differs:\n{diff_string}"
482
+ )
483
+
484
+ if self.num_rows != other.num_rows:
485
+ raise AssertionError(
486
+ f"{type(self).__name__} number of rows differ: self={self.num_rows} "
487
+ f"other={other.num_rows}"
488
+ )
489
+
490
+ # We can reach this point if the metadata schemas byte representations
491
+ # differ when the decoded schema is the same
492
+ if (
493
+ not ignore_metadata
494
+ and hasattr(self, "ll_table")
495
+ and hasattr(other, "ll_table")
496
+ and self.ll_table.metadata_schema != other.ll_table.metadata_schema
497
+ and self.metadata_schema == other.metadata_schema
498
+ ):
499
+ return
500
+
501
+ raise AssertionError(
502
+ "Tables differ in an undetected way - "
503
+ "this is a bug, please report an issue on github"
504
+ ) # pragma: no cover
505
+
506
+ def __eq__(self, other):
507
+ return self.equals(other)
508
+
509
+ def __len__(self):
510
+ return self.num_rows
511
+
512
+ def asdict(self):
513
+ """
514
+ Returns a dictionary mapping the names of the columns in this table
515
+ to the corresponding numpy arrays.
516
+ """
517
+ ret = {col: getattr(self, col) for col in self.column_names}
518
+ # Not all tables have metadata
519
+ try:
520
+ ret["metadata_schema"] = repr(self.metadata_schema)
521
+ except AttributeError:
522
+ pass
523
+ return ret
524
+
525
+ def __str__(self):
526
+ headers, rows = self._text_header_and_rows(
527
+ limit=tskit._print_options["max_lines"]
528
+ )
529
+ return util.unicode_table(rows, header=headers, row_separator=False)
530
+
531
+ def _repr_html_(self):
532
+ """
533
+ Called e.g. by jupyter notebooks to render tables
534
+ """
535
+ headers, rows = self._text_header_and_rows(
536
+ limit=tskit._print_options["max_lines"]
537
+ )
538
+ return util.html_table(rows, header=headers)
539
+
540
+ def _columns_all_integer(self, *colnames):
541
+ # For displaying floating point values without loads of decimal places
542
+ return all(
543
+ np.all(getattr(self, col) == np.floor(getattr(self, col)))
544
+ for col in colnames
545
+ )
546
+
547
+ def _text_header_and_rows(self, limit=None):
548
+ """
549
+ Returns headers and rows for table display.
550
+ """
551
+ # Generate headers: "id" + column names (excluding offset columns)
552
+ display_columns = [
553
+ col for col in self.column_names if not col.endswith("_offset")
554
+ ]
555
+ headers = ("id",) + tuple(display_columns)
556
+
557
+ rows = []
558
+ row_indexes = util.truncate_rows(self.num_rows, limit)
559
+
560
+ float_columns = {}
561
+ for col in display_columns:
562
+ arr = getattr(self, col)
563
+ if np.issubdtype(arr.dtype, np.floating):
564
+ float_columns[col] = 0 if self._columns_all_integer(col) else 8
565
+
566
+ for j in row_indexes:
567
+ if j == -1:
568
+ rows.append(f"__skipped__{self.num_rows - limit}")
569
+ else:
570
+ row = self[j]
571
+ formatted_values = [f"{j:,}"] # ID column
572
+ for col in display_columns:
573
+ value = getattr(row, col)
574
+ if col == "metadata":
575
+ formatted_values.append(util.render_metadata(value))
576
+ elif col in ["location", "parents"]:
577
+ # Array columns - join with commas
578
+ if col == "parents":
579
+ formatted_values.append(
580
+ ", ".join([f"{p:,}" for p in value])
581
+ )
582
+ else:
583
+ formatted_values.append(", ".join(map(str, value)))
584
+ elif col in float_columns:
585
+ dp = float_columns[col]
586
+ formatted_values.append(f"{value:,.{dp}f}")
587
+ elif isinstance(value, (int, np.integer)):
588
+ formatted_values.append(f"{value:,}")
589
+ else:
590
+ formatted_values.append(str(value))
591
+ rows.append(formatted_values)
592
+ return headers, rows
593
+
594
+
595
+ def _assert_table_collections_equal(
596
+ tc1,
597
+ tc2,
598
+ *,
599
+ ignore_metadata=False,
600
+ ignore_ts_metadata=False,
601
+ ignore_provenance=False,
602
+ ignore_timestamps=False,
603
+ ignore_reference_sequence=False,
604
+ ignore_tables=False,
605
+ ):
606
+ # This is shared between TableCollection and ImmutableTableCollection,
607
+ # could go in a base class, but there's not much else in common
608
+
609
+ if not (ignore_metadata or ignore_ts_metadata):
610
+ if tc1.metadata_schema != tc2.metadata_schema:
611
+ raise AssertionError(
612
+ f"Metadata schemas differ: self={tc1.metadata_schema} "
613
+ f"other={tc2.metadata_schema}"
614
+ )
615
+ if tc1.metadata != tc2.metadata:
616
+ raise AssertionError(
617
+ f"Metadata differs: self={tc1.metadata} other={tc2.metadata}"
618
+ )
619
+
620
+ if not ignore_reference_sequence:
621
+ tc1.reference_sequence.assert_equals(
622
+ tc2.reference_sequence, ignore_metadata=ignore_metadata
623
+ )
624
+
625
+ if tc1.time_units != tc2.time_units:
626
+ raise AssertionError(
627
+ f"Time units differs: self={tc1.time_units} other={tc2.time_units}"
628
+ )
629
+
630
+ if tc1.sequence_length != tc2.sequence_length:
631
+ raise AssertionError(
632
+ f"Sequence Length differs: self={tc1.sequence_length} "
633
+ f"other={tc2.sequence_length}"
634
+ )
635
+
636
+ if not ignore_tables:
637
+ for table_name, table in tc1.table_name_map.items():
638
+ if table_name == "provenances":
639
+ continue
640
+ other_table = getattr(tc2, table_name)
641
+ if isinstance(table, ImmutableBaseTable):
642
+ table.assert_equals(other_table, ignore_metadata=ignore_metadata)
643
+ elif isinstance(other_table, ImmutableBaseTable):
644
+ other_table.assert_equals(table, ignore_metadata=ignore_metadata)
645
+ else:
646
+ table.assert_equals(other_table, ignore_metadata=ignore_metadata)
647
+
648
+ if not ignore_provenance and not ignore_tables:
649
+ prov1 = tc1.provenances
650
+ prov2 = tc2.provenances
651
+ if isinstance(prov1, ImmutableProvenanceTable):
652
+ prov1.assert_equals(prov2, ignore_timestamps=ignore_timestamps)
653
+ elif isinstance(prov2, ImmutableProvenanceTable):
654
+ prov2.assert_equals(prov1, ignore_timestamps=ignore_timestamps)
655
+ else:
656
+ prov1.assert_equals(prov2, ignore_timestamps=ignore_timestamps)
657
+
658
+ if (
659
+ not ignore_metadata
660
+ and hasattr(tc1, "_ll_object")
661
+ and hasattr(tc2, "_ll_object")
662
+ and hasattr(tc1._ll_object, "metadata_schema")
663
+ and hasattr(tc2._ll_object, "metadata_schema")
664
+ and tc1._ll_object.metadata_schema != tc2._ll_object.metadata_schema
665
+ and tc1.metadata_schema == tc2.metadata_schema
666
+ ):
667
+ # Schemas differ in byte representation but are equivalent when decoded
668
+ return
669
+
670
+ # If we reach here, all comparisons matched; treat collections as equal.
671
+ return
672
+
673
+
674
+ class MutableBaseTable(BaseTable):
675
+ # Abstract base class for mutable tables that use the low-level table implementation.
676
+
677
+ mutable = True
678
+
679
+ def __init__(self, ll_table, row_class):
680
+ self.ll_table = ll_table
681
+ self.row_class = row_class
682
+
683
+ def _fast_equals(self, other, **kwargs):
684
+ return self.ll_table.equals(
685
+ other.ll_table, **{k: v for k, v in kwargs.items() if v is True}
686
+ )
687
+
688
+ @property
689
+ def num_rows(self) -> int:
690
+ return self.ll_table.num_rows
691
+
692
+ @property
693
+ def max_rows(self) -> int:
694
+ return self.ll_table.max_rows
695
+
696
+ @property
697
+ def max_rows_increment(self) -> int:
698
+ return self.ll_table.max_rows_increment
699
+
700
+ def __getattr__(self, name):
701
+ if name in self.column_names:
702
+ return getattr(self.ll_table, name)
703
+ else:
704
+ raise AttributeError(
705
+ f"{self.__class__.__name__} object has no attribute {name}"
706
+ )
707
+
708
+ def __setattr__(self, name, value):
709
+ if name in self.column_names:
710
+ d = self.asdict()
711
+ d[name] = value
712
+ self.set_columns(**d)
713
+ else:
714
+ object.__setattr__(self, name, value)
715
+
716
+ def _make_row(self, *args):
717
+ try:
718
+ return self.row_class(
719
+ *args, metadata_decoder=self.metadata_schema.decode_row
720
+ )
721
+ except AttributeError:
722
+ return self.row_class(*args)
723
+
724
+ def __getitem__(self, index):
725
+ """
726
+ If passed an integer, return the specified row of this table, decoding metadata
727
+ if it is present. Supports negative indexing, e.g. ``table[-5]``.
728
+ If passed a slice, iterable or array return a new table containing the specified
729
+ rows. Similar to numpy fancy indexing, if the array or iterables contains
730
+ booleans then the index acts as a mask, returning those rows for which the mask
731
+ is True. Note that as the result is a new table, the row ids will change as tskit
732
+ row ids are row indexes.
733
+
734
+ :param index: the index of a desired row, a slice of the desired rows, an
735
+ iterable or array of the desired row numbers, or a boolean array to use as
736
+ a mask.
737
+ """
738
+
739
+ if isinstance(index, numbers.Integral):
740
+ # Single row by integer
741
+ if index < 0:
742
+ index += len(self)
743
+ if index < 0 or index >= len(self):
744
+ raise IndexError("Index out of bounds")
745
+ return self._make_row(*self.ll_table.get_row(index))
746
+ elif isinstance(index, numbers.Number):
747
+ raise TypeError("Index must be integer, slice or iterable")
748
+ elif isinstance(index, slice):
749
+ index = range(*index.indices(len(self)))
750
+ else:
751
+ index = np.asarray(index)
752
+ if index.dtype == np.bool_:
753
+ if len(index) != len(self):
754
+ raise IndexError("Boolean index must be same length as table")
755
+ index = np.flatnonzero(index)
756
+ index = util.safe_np_int_cast(index, np.int32)
757
+
758
+ ret = self.__class__()
759
+ # Not all tables have metadata schemas; guard access
760
+ try:
761
+ ret.metadata_schema = self.metadata_schema
762
+ except AttributeError:
763
+ pass
764
+ ret.ll_table.extend(self.ll_table, row_indexes=index)
765
+
766
+ return ret
767
+
768
+ def __setitem__(self, index, new_row):
769
+ """
770
+ Replaces a row of this table at the specified index with information from a
771
+ row-like object. Metadata, will be validated and encoded according to the table's
772
+ :attr:`metadata_schema<tskit.IndividualTable.metadata_schema>`.
773
+
774
+ :param index: the index of the row to change
775
+ :param row-like new_row: An object that has attributes corresponding to the
776
+ properties of the new row. Both the objects returned from ``table[i]`` and
777
+ e.g. ``ts.individual(i)`` work for this purpose, along with any other
778
+ object with the correct attributes.
779
+ """
780
+ if isinstance(index, numbers.Integral):
781
+ # Single row by integer
782
+ if index < 0:
783
+ index += len(self)
784
+ if index < 0 or index >= len(self):
785
+ raise IndexError("Index out of bounds")
786
+ else:
787
+ raise TypeError("Index must be integer")
788
+
789
+ row_data = {
790
+ column: getattr(new_row, column)
791
+ for column in self.column_names
792
+ if "_offset" not in column
793
+ }
794
+
795
+ # Encode the metadata - note that if this becomes a perf bottleneck it is
796
+ # possible to use the cached, encoded metadata in the row object, rather than
797
+ # decode and reencode
798
+ if "metadata" in row_data:
799
+ row_data["metadata"] = self.metadata_schema.validate_and_encode_row(
800
+ row_data["metadata"]
801
+ )
802
+
803
+ self.ll_table.update_row(row_index=index, **row_data)
804
+
805
+ def append(self, row):
806
+ """
807
+ Adds a new row to this table and returns the ID of the new row. Metadata, if
808
+ specified, will be validated and encoded according to the table's
809
+ :attr:`metadata_schema<tskit.IndividualTable.metadata_schema>`.
810
+
811
+ :param row-like row: An object that has attributes corresponding to the
812
+ properties of the new row. Both the objects returned from ``table[i]`` and
813
+ e.g. ``ts.individual(i)`` work for this purpose, along with any other
814
+ object with the correct attributes.
815
+ :return: The index of the newly added row.
816
+ :rtype: int
817
+ """
818
+ return self.add_row(
819
+ **{
820
+ column: getattr(row, column)
821
+ for column in self.column_names
822
+ if "_offset" not in column
823
+ }
824
+ )
825
+
826
+ def replace_with(self, other):
827
+ # Overwrite the contents of this table with a copy of the other table
828
+ self.set_columns(**other.asdict())
829
+
830
+ def clear(self):
831
+ """
832
+ Deletes all rows in this table.
833
+ """
834
+ self.ll_table.clear()
835
+
836
+ def reset(self):
837
+ # Deprecated alias for clear
838
+ self.clear()
839
+
840
+ def truncate(self, num_rows):
841
+ """
842
+ Truncates this table so that the only the first ``num_rows`` are retained.
843
+
844
+ :param int num_rows: The number of rows to retain in this table.
845
+ """
846
+ return self.ll_table.truncate(num_rows)
847
+
848
+ def keep_rows(self, keep):
849
+ """
850
+ .. include:: substitutions/table_keep_rows_main.rst
851
+
852
+ :param array-like keep: The rows to keep as a boolean array. Must
853
+ be the same length as the table, and convertible to a numpy
854
+ array of dtype bool.
855
+ :return: The mapping between old and new row IDs as a numpy
856
+ array (dtype int32).
857
+ :rtype: numpy.ndarray (dtype=np.int32)
858
+ """
859
+ # We do this check here rather than in the C code because calling
860
+ # len() on the input will cause a more readable exception to be
861
+ # raised than the inscrutable errors we get from numpy when
862
+ # converting arguments of the wrong type.
863
+ if len(keep) != len(self):
864
+ msg = (
865
+ "Argument for keep_rows must be a boolean array of "
866
+ "the same length as the table. "
867
+ f"(need:{len(self)}, got:{len(keep)})"
868
+ )
869
+ raise ValueError(msg)
870
+ return self.ll_table.keep_rows(keep)
871
+
872
+ # Pickle support
873
+ def __getstate__(self):
874
+ return self.asdict()
875
+
876
+ # Unpickle support
877
+ def __setstate__(self, state):
878
+ self.__init__()
879
+ self.set_columns(**state)
880
+
881
+ def copy(self):
882
+ """
883
+ Returns a deep copy of this table
884
+ """
885
+ copy = self.__class__()
886
+ copy.set_columns(**self.asdict())
887
+ return copy
888
+
889
+ def set_columns(self, **kwargs):
890
+ """
891
+ Sets the values for each column in this :class:`Table` using values
892
+ provided in numpy arrays. Overwrites existing data in all the table columns.
893
+ """
894
+ raise NotImplementedError()
895
+
896
+
897
+ class ImmutableBaseTable(BaseTable):
898
+ # List of all mutation methods that should give a nice error
899
+ _MUTATION_METHODS = {
900
+ "add_row",
901
+ "clear",
902
+ "set_columns",
903
+ "truncate",
904
+ "replace_with",
905
+ "append_columns",
906
+ "keep_rows",
907
+ "append",
908
+ "reset",
909
+ "drop_metadata",
910
+ "packset_metadata",
911
+ "packset_location",
912
+ "packset_parents",
913
+ "packset_ancestral_state",
914
+ "packset_derived_state",
915
+ "packset_record",
916
+ "packset_timestamp",
917
+ "squash",
918
+ }
919
+
920
+ mutable = False
921
+ # These are set by subclasses.
922
+ _row_field_indices = None
923
+ table_name = None
924
+ mutable_class = None
925
+
926
+ def __init__(self, ll_tree_sequence, row_indices=None, row_slice=None):
927
+ object.__setattr__(self, "_initialised", False)
928
+ self._llts = ll_tree_sequence
929
+ singular_name = self.table_name.rstrip("s")
930
+ self.row_class = globals()[f"{singular_name.capitalize()}TableRow"]
931
+ self._ll_row_getter = f"get_{singular_name}"
932
+ self._set_column_names = set(self.column_names)
933
+
934
+ self._row_indices = row_indices
935
+ self._row_slice = row_slice
936
+ if row_indices is None:
937
+ if row_slice is None:
938
+ self.num_rows = getattr(self._llts, f"get_num_{self.table_name}")()
939
+ else:
940
+ self.num_rows = max(0, row_slice.stop - row_slice.start)
941
+ self._row_slice = row_slice
942
+ else:
943
+ self.num_rows = len(row_indices)
944
+ self._row_slice = None
945
+ object.__setattr__(self, "_initialised", True)
946
+
947
+ def copy(self):
948
+ """
949
+ Returns a mutable deep copy of this ImmutableTableCollection.
950
+
951
+ :return: A deep copy of this ImmutableTableCollection.
952
+ :rtype: tskit.TableCollection
953
+ """
954
+ mutable_table = self.mutable_class()
955
+ column_data = self.asdict()
956
+ mutable_table.set_columns(**column_data)
957
+ return mutable_table
958
+
959
+ def __len__(self):
960
+ return self.num_rows
961
+
962
+ def __iter__(self):
963
+ row_factory = self._create_row_object
964
+ if self._row_indices is not None:
965
+ for ll_index in self._row_indices:
966
+ yield row_factory(ll_index)
967
+ return
968
+ if self._row_slice is None:
969
+ start = 0
970
+ stop = self.num_rows
971
+ else:
972
+ start = self._row_slice.start
973
+ stop = self._row_slice.stop
974
+ for ll_index in range(start, stop):
975
+ yield row_factory(ll_index)
976
+
977
+ def _fast_equals(self, other, *, ignore_metadata=False, ignore_timestamps=False):
978
+ if self.num_rows != other.num_rows:
979
+ return False
980
+ if (
981
+ not ignore_metadata
982
+ and hasattr(self, "metadata_schema")
983
+ and hasattr(other, "metadata_schema")
984
+ and self.metadata_schema != other.metadata_schema
985
+ ):
986
+ return False
987
+ for column_name in self.column_names:
988
+ if ignore_metadata and column_name.startswith("metadata"):
989
+ continue
990
+ if (
991
+ ignore_timestamps
992
+ and getattr(self, "table_name", None) == "provenances"
993
+ and column_name in ("timestamp", "timestamp_offset")
994
+ ):
995
+ continue
996
+ if not np.array_equal(
997
+ getattr(self, column_name), getattr(other, column_name), equal_nan=True
998
+ ):
999
+ return False
1000
+ return True
1001
+
1002
+ def __getattr__(self, name):
1003
+ # Handle attribute access. This method is only called when an attribute
1004
+ # is not found through normal lookup, so we can lazily calculate column
1005
+ # contents.
1006
+ if name in self._set_column_names:
1007
+ full_array = getattr(self._llts, f"{self.table_name}_{name}")
1008
+ # TableCollection methods use the LWT code, which is stuck returning
1009
+ # int8 for compatibility see https://github.com/tskit-dev/tskit/issues/3284
1010
+ if name == "metadata":
1011
+ full_array = full_array.view(np.int8)
1012
+ if not (self._row_indices is None and self._row_slice is None):
1013
+ is_offset = name.endswith("_offset")
1014
+ is_ragged = f"{name}_offset" in self._set_column_names
1015
+ if self._row_indices is None:
1016
+ subset_array = self._slice_column(
1017
+ full_array, name, is_offset, is_ragged
1018
+ )
1019
+ else:
1020
+ subset_array = self._select_column(
1021
+ full_array, name, is_offset, is_ragged
1022
+ )
1023
+ else:
1024
+ subset_array = full_array
1025
+ # Store the result, so on the next access we don't need to calculate it again
1026
+ object.__setattr__(self, name, subset_array)
1027
+ return subset_array
1028
+
1029
+ if name in self._MUTATION_METHODS:
1030
+ raise ImmutableTableError(
1031
+ f"Cannot call {name}() on immutable {self.table_name} table. "
1032
+ f"Use TreeSequence.dump_tables() for mutable copy."
1033
+ )
1034
+
1035
+ # If it's not a blocked method or column, delegate to parent classes
1036
+ # This allows metadata mixins to handle metadata_schema and other attributes
1037
+ raise AttributeError(
1038
+ f"'{self.__class__.__name__}' object has no attribute '{name}'"
1039
+ )
1040
+
1041
+ def _slice_column(self, full_array, name, is_offset, is_ragged):
1042
+ row_slice = self._row_slice
1043
+ start = row_slice.start
1044
+ stop = row_slice.stop
1045
+ if is_offset:
1046
+ return full_array[start : stop + 1]
1047
+ elif is_ragged:
1048
+ offset_array = getattr(self._llts, f"{self.table_name}_{name}_offset")
1049
+ return full_array[offset_array[start] : offset_array[stop]]
1050
+ else:
1051
+ return full_array[row_slice]
1052
+
1053
+ def _select_column(self, full_array, name, is_offset, is_ragged):
1054
+ indices = self._row_indices
1055
+ if is_ragged:
1056
+ ragged, offsets = self._select_column_ragged(full_array, name, indices)
1057
+ # We calculated _offset, so might as well store it so it doesn't
1058
+ # need to be recalculated if accessed
1059
+ object.__setattr__(self, f"{name}_offset", offsets)
1060
+ return ragged
1061
+ elif is_offset:
1062
+ return self._select_column_offset(full_array, indices)
1063
+ else:
1064
+ return full_array[indices]
1065
+
1066
+ def _select_column_offset(self, offset_array, indices):
1067
+ lengths = offset_array[indices + 1] - offset_array[indices]
1068
+ result = np.empty(lengths.size + 1, dtype=offset_array.dtype)
1069
+ result[0] = 0
1070
+ if lengths.size > 0:
1071
+ np.cumsum(lengths, dtype=offset_array.dtype, out=result[1:])
1072
+ return result
1073
+
1074
+ def _select_column_ragged(self, full_array, name, indices):
1075
+ offset_array = getattr(self._llts, f"{self.table_name}_{name}_offset")
1076
+ indexed_offsets = offset_array[indices]
1077
+ lengths64 = (offset_array[indices + 1] - indexed_offsets).astype(
1078
+ np.int64, copy=False
1079
+ )
1080
+ gather_indices = _ragged_selection_indices(indexed_offsets, lengths64)
1081
+ result = full_array[gather_indices]
1082
+ offsets_result = self._select_column_offset(offset_array, indices)
1083
+ return result, offsets_result
1084
+
1085
+ def __getitem__(self, index):
1086
+ try:
1087
+ row_index = operator.index(index)
1088
+ except TypeError:
1089
+ selector = self._resolve_selector(index)
1090
+ if isinstance(selector, slice):
1091
+ return self.__class__(self._llts, row_slice=selector)
1092
+ return self.__class__(self._llts, row_indices=selector)
1093
+
1094
+ if row_index < 0:
1095
+ row_index += self.num_rows
1096
+ if row_index < 0 or row_index >= self.num_rows:
1097
+ raise IndexError("Index out of bounds")
1098
+ ll_index = self._resolve_single_index(row_index)
1099
+ return self._create_row_object(ll_index)
1100
+
1101
+ def _current_ll_indices(self):
1102
+ if self._row_indices is None:
1103
+ if self._row_slice is None:
1104
+ start = 0
1105
+ stop = self.num_rows
1106
+ else:
1107
+ start = self._row_slice.start
1108
+ stop = self._row_slice.stop
1109
+ return np.arange(start, stop, dtype=np.int64)
1110
+ return np.asarray(self._row_indices)
1111
+
1112
+ def _resolve_single_index(self, row_index):
1113
+ if self._row_indices is None:
1114
+ base_start = 0 if self._row_slice is None else self._row_slice.start
1115
+ return int(base_start + row_index)
1116
+ return int(self._row_indices[row_index])
1117
+
1118
+ def _resolve_selector(self, selector):
1119
+ if isinstance(selector, slice):
1120
+ step = selector.step or 1
1121
+ if step == 1 and self._row_indices is None:
1122
+ start, stop, _ = selector.indices(self.num_rows)
1123
+ base_start = 0 if self._row_slice is None else self._row_slice.start
1124
+ return slice(base_start + start, base_start + stop)
1125
+ indices = np.arange(self.num_rows, dtype=np.int64)
1126
+ selector = indices[selector]
1127
+
1128
+ selector = np.asarray(selector)
1129
+ if selector.dtype == np.bool_:
1130
+ if len(selector) != self.num_rows:
1131
+ raise IndexError("Boolean index must be same length as table")
1132
+ selector = np.flatnonzero(selector)
1133
+ else:
1134
+ selector = util.safe_np_int_cast(selector, np.int64)
1135
+
1136
+ ll_indices = self._current_ll_indices()
1137
+ resolved = ll_indices[selector]
1138
+ if resolved.dtype != np.int32:
1139
+ resolved = util.safe_np_int_cast(resolved, np.int32)
1140
+ return resolved
1141
+
1142
+ def _create_row_object(self, ll_index):
1143
+ raw_row = getattr(self._llts, self._ll_row_getter)(int(ll_index))
1144
+ spec = self._row_field_indices
1145
+ if spec is None:
1146
+ values = list(raw_row)
1147
+ else:
1148
+ values = [raw_row[i] for i in spec]
1149
+ try:
1150
+ return self.row_class(
1151
+ *values, metadata_decoder=self.metadata_schema.decode_row
1152
+ )
1153
+ except AttributeError:
1154
+ return self.row_class(*values)
1155
+
1156
+ def __setattr__(self, name, value):
1157
+ # Allow all assignments during initialization
1158
+ if not self._initialised:
1159
+ object.__setattr__(self, name, value)
1160
+ return
1161
+ # Allow internal/private attributes
1162
+ if name.startswith("_"):
1163
+ object.__setattr__(self, name, value)
1164
+ return
1165
+ raise ImmutableTableError(
1166
+ f"Cannot set attribute '{name}' on immutable {self.table_name} table. "
1167
+ f"Use TreeSequence.dump_tables() for mutable copy."
1168
+ )
1169
+
1170
+
1171
+ class MutableMetadataTable(MutableBaseTable, metadata.TableMetadataWriter):
1172
+ pass
1173
+
1174
+
1175
+ class ImmutableMetadataTable(ImmutableBaseTable, metadata.TableMetadataReader):
1176
+ @property
1177
+ def metadata_schema(self):
1178
+ """
1179
+ The :class:`tskit.MetadataSchema` for this table.
1180
+ Overrides the base implementation to access schema from tree sequence.
1181
+ """
1182
+ try:
1183
+ return self._metadata_schema
1184
+ except AttributeError:
1185
+ self._metadata_schema = metadata.parse_metadata_schema(
1186
+ getattr(
1187
+ self._llts.get_table_metadata_schemas(),
1188
+ # Use singular form for table name
1189
+ self.table_name.rstrip("s"),
1190
+ )
1191
+ )
1192
+ return self._metadata_schema
1193
+
1194
+
1195
+ class IndividualTable(MutableMetadataTable):
1196
+ """
1197
+ A table defining the individuals in a tree sequence. Note that although
1198
+ each Individual has associated nodes, reference to these is not stored in
1199
+ the individual table, but rather reference to the individual is stored for
1200
+ each node in the :class:`NodeTable`. This is similar to the way in which
1201
+ the relationship between sites and mutations is modelled.
1202
+
1203
+ .. include:: substitutions/table_edit_warning.rst
1204
+
1205
+ :ivar flags: The array of flags values.
1206
+ :vartype flags: numpy.ndarray, dtype=np.uint32
1207
+ :ivar location: The flattened array of floating point location values. See
1208
+ :ref:`sec_encoding_ragged_columns` for more details.
1209
+ :vartype location: numpy.ndarray, dtype=np.float64
1210
+ :ivar location_offset: The array of offsets into the location column. See
1211
+ :ref:`sec_encoding_ragged_columns` for more details.
1212
+ :vartype location_offset: numpy.ndarray, dtype=np.uint32
1213
+ :ivar parents: The flattened array of parent individual ids. See
1214
+ :ref:`sec_encoding_ragged_columns` for more details.
1215
+ :vartype parents: numpy.ndarray, dtype=np.int32
1216
+ :ivar parents_offset: The array of offsets into the parents column. See
1217
+ :ref:`sec_encoding_ragged_columns` for more details.
1218
+ :vartype parents_offset: numpy.ndarray, dtype=np.uint32
1219
+ :ivar metadata: The flattened array of binary metadata values. See
1220
+ :ref:`sec_tables_api_binary_columns` for more details.
1221
+ :vartype metadata: numpy.ndarray, dtype=np.int8
1222
+ :ivar metadata_offset: The array of offsets into the metadata column. See
1223
+ :ref:`sec_tables_api_binary_columns` for more details.
1224
+ :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
1225
+ :ivar metadata_schema: The metadata schema for this table's metadata column
1226
+ :vartype metadata_schema: tskit.MetadataSchema
1227
+ """
1228
+
1229
+ table_name = "individuals"
1230
+ column_names = [
1231
+ "flags",
1232
+ "location",
1233
+ "location_offset",
1234
+ "parents",
1235
+ "parents_offset",
1236
+ "metadata",
1237
+ "metadata_offset",
1238
+ ]
1239
+
1240
+ def __init__(self, max_rows_increment=0, ll_table=None):
1241
+ if ll_table is None:
1242
+ ll_table = _tskit.IndividualTable(max_rows_increment=max_rows_increment)
1243
+ super().__init__(ll_table, IndividualTableRow)
1244
+
1245
+ def add_row(self, flags=0, location=None, parents=None, metadata=None):
1246
+ """
1247
+ Adds a new row to this :class:`IndividualTable` and returns the ID of the
1248
+ corresponding individual. Metadata, if specified, will be validated and encoded
1249
+ according to the table's
1250
+ :attr:`metadata_schema<tskit.IndividualTable.metadata_schema>`.
1251
+
1252
+ :param int flags: The bitwise flags for the new node.
1253
+ :param array-like location: A list of numeric values or one-dimensional numpy
1254
+ array describing the location of this individual. If not specified
1255
+ or None, a zero-dimensional location is stored.
1256
+ :param array-like parents: A list or array of ids of parent individuals. If not
1257
+ specified an empty array is stored.
1258
+ :param object metadata: Any object that is valid metadata for the table's schema.
1259
+ Defaults to the default metadata value for the table's schema. This is
1260
+ typically ``{}``. For no schema, ``None``.
1261
+ :return: The ID of the newly added individual.
1262
+ :rtype: int
1263
+ """
1264
+ if metadata is None:
1265
+ metadata = self.metadata_schema.empty_value
1266
+ metadata = self.metadata_schema.validate_and_encode_row(metadata)
1267
+ return self.ll_table.add_row(
1268
+ flags=flags, location=location, parents=parents, metadata=metadata
1269
+ )
1270
+
1271
+ def set_columns(
1272
+ self,
1273
+ flags=None,
1274
+ location=None,
1275
+ location_offset=None,
1276
+ parents=None,
1277
+ parents_offset=None,
1278
+ metadata=None,
1279
+ metadata_offset=None,
1280
+ metadata_schema=None,
1281
+ ):
1282
+ """
1283
+ Sets the values for each column in this :class:`IndividualTable` using the
1284
+ values in the specified arrays. Overwrites existing data in all the table
1285
+ columns.
1286
+
1287
+ The ``flags`` array is mandatory and defines the number of individuals
1288
+ the table will contain.
1289
+ The ``location`` and ``location_offset`` parameters must be supplied
1290
+ together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.
1291
+ The ``parents`` and ``parents_offset`` parameters must be supplied
1292
+ together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.
1293
+ The ``metadata`` and ``metadata_offset`` parameters must be supplied
1294
+ together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.
1295
+ See :ref:`sec_tables_api_binary_columns` for more information and
1296
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
1297
+
1298
+ :param flags: The bitwise flags for each individual. Required.
1299
+ :type flags: numpy.ndarray, dtype=np.uint32
1300
+ :param location: The flattened location array. Must be specified along
1301
+ with ``location_offset``. If not specified or None, an empty location
1302
+ value is stored for each individual.
1303
+ :type location: numpy.ndarray, dtype=np.float64
1304
+ :param location_offset: The offsets into the ``location`` array.
1305
+ :type location_offset: numpy.ndarray, dtype=np.uint32.
1306
+ :param parents: The flattened parents array. Must be specified along
1307
+ with ``parents_offset``. If not specified or None, an empty parents array
1308
+ is stored for each individual.
1309
+ :type parents: numpy.ndarray, dtype=np.int32
1310
+ :param parents_offset: The offsets into the ``parents`` array.
1311
+ :type parents_offset: numpy.ndarray, dtype=np.uint32.
1312
+ :param metadata: The flattened metadata array. Must be specified along
1313
+ with ``metadata_offset``. If not specified or None, an empty metadata
1314
+ value is stored for each individual.
1315
+ :type metadata: numpy.ndarray, dtype=np.int8
1316
+ :param metadata_offset: The offsets into the ``metadata`` array.
1317
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
1318
+ :param metadata_schema: The encoded metadata schema. If None (default)
1319
+ do not overwrite the exising schema. Note that a schema will need to be
1320
+ encoded as a string, e.g. via ``repr(new_metadata_schema)``.
1321
+ :type metadata_schema: str
1322
+
1323
+ """
1324
+ self._check_required_args(flags=flags)
1325
+ self.ll_table.set_columns(
1326
+ dict(
1327
+ flags=flags,
1328
+ location=location,
1329
+ location_offset=location_offset,
1330
+ parents=parents,
1331
+ parents_offset=parents_offset,
1332
+ metadata=metadata,
1333
+ metadata_offset=metadata_offset,
1334
+ metadata_schema=metadata_schema,
1335
+ )
1336
+ )
1337
+
1338
+ def append_columns(
1339
+ self,
1340
+ flags=None,
1341
+ location=None,
1342
+ location_offset=None,
1343
+ parents=None,
1344
+ parents_offset=None,
1345
+ metadata=None,
1346
+ metadata_offset=None,
1347
+ ):
1348
+ """
1349
+ Appends the specified arrays to the end of the columns in this
1350
+ :class:`IndividualTable`. This allows many new rows to be added at once.
1351
+
1352
+ The ``flags`` array is mandatory and defines the number of
1353
+ extra individuals to add to the table.
1354
+ The ``parents`` and ``parents_offset`` parameters must be supplied
1355
+ together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.
1356
+ The ``location`` and ``location_offset`` parameters must be supplied
1357
+ together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.
1358
+ The ``metadata`` and ``metadata_offset`` parameters must be supplied
1359
+ together, and meet the requirements for :ref:`sec_encoding_ragged_columns`.
1360
+ See :ref:`sec_tables_api_binary_columns` for more information and
1361
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
1362
+
1363
+ :param flags: The bitwise flags for each individual. Required.
1364
+ :type flags: numpy.ndarray, dtype=np.uint32
1365
+ :param location: The flattened location array. Must be specified along
1366
+ with ``location_offset``. If not specified or None, an empty location
1367
+ value is stored for each individual.
1368
+ :type location: numpy.ndarray, dtype=np.float64
1369
+ :param location_offset: The offsets into the ``location`` array.
1370
+ :type location_offset: numpy.ndarray, dtype=np.uint32.
1371
+ :param metadata: The flattened metadata array. Must be specified along
1372
+ with ``metadata_offset``. If not specified or None, an empty metadata
1373
+ value is stored for each individual.
1374
+ :param parents: The flattened parents array. Must be specified along
1375
+ with ``parents_offset``. If not specified or None, an empty parents array
1376
+ is stored for each individual.
1377
+ :type parents: numpy.ndarray, dtype=np.int32
1378
+ :param parents_offset: The offsets into the ``parents`` array.
1379
+ :type metadata: numpy.ndarray, dtype=np.int8
1380
+ :param metadata_offset: The offsets into the ``metadata`` array.
1381
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
1382
+ """
1383
+ self._check_required_args(flags=flags)
1384
+ self.ll_table.append_columns(
1385
+ dict(
1386
+ flags=flags,
1387
+ location=location,
1388
+ location_offset=location_offset,
1389
+ parents=parents,
1390
+ parents_offset=parents_offset,
1391
+ metadata=metadata,
1392
+ metadata_offset=metadata_offset,
1393
+ )
1394
+ )
1395
+
1396
+ def packset_location(self, locations):
1397
+ """
1398
+ Packs the specified list of location values and updates the ``location``
1399
+ and ``location_offset`` columns. The length of the locations array
1400
+ must be equal to the number of rows in the table.
1401
+
1402
+ :param list locations: A list of locations interpreted as numpy float64
1403
+ arrays.
1404
+ """
1405
+ packed, offset = util.pack_arrays(locations)
1406
+ d = self.asdict()
1407
+ d["location"] = packed
1408
+ d["location_offset"] = offset
1409
+ self.set_columns(**d)
1410
+
1411
+ def packset_parents(self, parents):
1412
+ """
1413
+ Packs the specified list of parent values and updates the ``parent``
1414
+ and ``parent_offset`` columns. The length of the parents array
1415
+ must be equal to the number of rows in the table.
1416
+
1417
+ :param list parents: A list of list of parent ids, interpreted as numpy int32
1418
+ arrays.
1419
+ """
1420
+ packed, offset = util.pack_arrays(parents, np.int32)
1421
+ d = self.asdict()
1422
+ d["parents"] = packed
1423
+ d["parents_offset"] = offset
1424
+ self.set_columns(**d)
1425
+
1426
+ def keep_rows(self, keep):
1427
+ """
1428
+ .. include:: substitutions/table_keep_rows_main.rst
1429
+
1430
+ The values in the ``parents`` column are updated according to this
1431
+ map, so that reference integrity within the table is maintained.
1432
+ As a consequence of this, the values in the ``parents`` column
1433
+ for kept rows are bounds-checked and an error raised if they
1434
+ are not valid. Rows that are deleted are not checked for
1435
+ parent ID integrity.
1436
+
1437
+ If an attempt is made to delete rows that are referred to by
1438
+ the ``parents`` column of rows that are retained, an error
1439
+ is raised.
1440
+
1441
+ These error conditions are checked before any alterations to
1442
+ the table are made.
1443
+
1444
+ :param array-like keep: The rows to keep as a boolean array. Must
1445
+ be the same length as the table, and convertible to a numpy
1446
+ array of dtype bool.
1447
+ :return: The mapping between old and new row IDs as a numpy
1448
+ array (dtype int32).
1449
+ :rtype: numpy.ndarray (dtype=np.int32)
1450
+ """
1451
+ return super().keep_rows(keep)
1452
+
1453
+
1454
+ class NodeTable(MutableMetadataTable):
1455
+ """
1456
+ A table defining the nodes in a tree sequence. See the
1457
+ :ref:`definitions <sec_node_table_definition>` for details on the columns
1458
+ in this table and the
1459
+ :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section
1460
+ for the properties needed for a node table to be a part of a valid tree sequence.
1461
+
1462
+ .. include:: substitutions/table_edit_warning.rst
1463
+
1464
+ :ivar time: The array of time values.
1465
+ :vartype time: numpy.ndarray, dtype=np.float64
1466
+ :ivar flags: The array of flags values.
1467
+ :vartype flags: numpy.ndarray, dtype=np.uint32
1468
+ :ivar population: The array of population IDs.
1469
+ :vartype population: numpy.ndarray, dtype=np.int32
1470
+ :ivar individual: The array of individual IDs that each node belongs to.
1471
+ :vartype individual: numpy.ndarray, dtype=np.int32
1472
+ :ivar metadata: The flattened array of binary metadata values. See
1473
+ :ref:`sec_tables_api_binary_columns` for more details.
1474
+ :vartype metadata: numpy.ndarray, dtype=np.int8
1475
+ :ivar metadata_offset: The array of offsets into the metadata column. See
1476
+ :ref:`sec_tables_api_binary_columns` for more details.
1477
+ :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
1478
+ :ivar metadata_schema: The metadata schema for this table's metadata column
1479
+ :vartype metadata_schema: tskit.MetadataSchema
1480
+ """
1481
+
1482
+ table_name = "nodes"
1483
+ column_names = [
1484
+ "time",
1485
+ "flags",
1486
+ "population",
1487
+ "individual",
1488
+ "metadata",
1489
+ "metadata_offset",
1490
+ ]
1491
+
1492
+ def __init__(self, max_rows_increment=0, ll_table=None):
1493
+ if ll_table is None:
1494
+ ll_table = _tskit.NodeTable(max_rows_increment=max_rows_increment)
1495
+ super().__init__(ll_table, NodeTableRow)
1496
+
1497
+ def add_row(self, flags=0, time=0, population=-1, individual=-1, metadata=None):
1498
+ """
1499
+ Adds a new row to this :class:`NodeTable` and returns the ID of the
1500
+ corresponding node. Metadata, if specified, will be validated and encoded
1501
+ according to the table's
1502
+ :attr:`metadata_schema<tskit.NodeTable.metadata_schema>`.
1503
+
1504
+ :param int flags: The bitwise flags for the new node.
1505
+ :param float time: The birth time for the new node.
1506
+ :param int population: The ID of the population in which the new node was born.
1507
+ Defaults to :data:`tskit.NULL`.
1508
+ :param int individual: The ID of the individual in which the new node was born.
1509
+ Defaults to :data:`tskit.NULL`.
1510
+ :param object metadata: Any object that is valid metadata for the table's schema.
1511
+ Defaults to the default metadata value for the table's schema. This is
1512
+ typically ``{}``. For no schema, ``None``.
1513
+ :return: The ID of the newly added node.
1514
+ :rtype: int
1515
+ """
1516
+ if metadata is None:
1517
+ metadata = self.metadata_schema.empty_value
1518
+ metadata = self.metadata_schema.validate_and_encode_row(metadata)
1519
+ return self.ll_table.add_row(flags, time, population, individual, metadata)
1520
+
1521
+ def set_columns(
1522
+ self,
1523
+ flags=None,
1524
+ time=None,
1525
+ population=None,
1526
+ individual=None,
1527
+ metadata=None,
1528
+ metadata_offset=None,
1529
+ metadata_schema=None,
1530
+ ):
1531
+ """
1532
+ Sets the values for each column in this :class:`NodeTable` using the values in
1533
+ the specified arrays. Overwrites existing data in all the table columns.
1534
+
1535
+ The ``flags``, ``time`` and ``population`` arrays must all be of the same length,
1536
+ which is equal to the number of nodes the table will contain. The
1537
+ ``metadata`` and ``metadata_offset`` parameters must be supplied together, and
1538
+ meet the requirements for :ref:`sec_encoding_ragged_columns`.
1539
+ See :ref:`sec_tables_api_binary_columns` for more information and
1540
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
1541
+
1542
+ :param flags: The bitwise flags for each node. Required.
1543
+ :type flags: numpy.ndarray, dtype=np.uint32
1544
+ :param time: The time values for each node. Required.
1545
+ :type time: numpy.ndarray, dtype=np.float64
1546
+ :param population: The population values for each node. If not specified
1547
+ or None, the :data:`tskit.NULL` value is stored for each node.
1548
+ :type population: numpy.ndarray, dtype=np.int32
1549
+ :param individual: The individual values for each node. If not specified
1550
+ or None, the :data:`tskit.NULL` value is stored for each node.
1551
+ :type individual: numpy.ndarray, dtype=np.int32
1552
+ :param metadata: The flattened metadata array. Must be specified along
1553
+ with ``metadata_offset``. If not specified or None, an empty metadata
1554
+ value is stored for each node.
1555
+ :type metadata: numpy.ndarray, dtype=np.int8
1556
+ :param metadata_offset: The offsets into the ``metadata`` array.
1557
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
1558
+ :param metadata_schema: The encoded metadata schema. If None (default)
1559
+ do not overwrite the exising schema. Note that a schema will need to be
1560
+ encoded as a string, e.g. via ``repr(new_metadata_schema)``.
1561
+ :type metadata_schema: str
1562
+ """
1563
+ self._check_required_args(flags=flags, time=time)
1564
+ self.ll_table.set_columns(
1565
+ dict(
1566
+ flags=flags,
1567
+ time=time,
1568
+ population=population,
1569
+ individual=individual,
1570
+ metadata=metadata,
1571
+ metadata_offset=metadata_offset,
1572
+ metadata_schema=metadata_schema,
1573
+ )
1574
+ )
1575
+
1576
+ def append_columns(
1577
+ self,
1578
+ flags=None,
1579
+ time=None,
1580
+ population=None,
1581
+ individual=None,
1582
+ metadata=None,
1583
+ metadata_offset=None,
1584
+ ):
1585
+ """
1586
+ Appends the specified arrays to the end of the columns in this
1587
+ :class:`NodeTable`. This allows many new rows to be added at once.
1588
+
1589
+ The ``flags``, ``time`` and ``population`` arrays must all be of the same length,
1590
+ which is equal to the number of nodes that will be added to the table. The
1591
+ ``metadata`` and ``metadata_offset`` parameters must be supplied together, and
1592
+ meet the requirements for :ref:`sec_encoding_ragged_columns`.
1593
+ See :ref:`sec_tables_api_binary_columns` for more information and
1594
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
1595
+
1596
+ :param flags: The bitwise flags for each node. Required.
1597
+ :type flags: numpy.ndarray, dtype=np.uint32
1598
+ :param time: The time values for each node. Required.
1599
+ :type time: numpy.ndarray, dtype=np.float64
1600
+ :param population: The population values for each node. If not specified
1601
+ or None, the :data:`tskit.NULL` value is stored for each node.
1602
+ :type population: numpy.ndarray, dtype=np.int32
1603
+ :param individual: The individual values for each node. If not specified
1604
+ or None, the :data:`tskit.NULL` value is stored for each node.
1605
+ :type individual: numpy.ndarray, dtype=np.int32
1606
+ :param metadata: The flattened metadata array. Must be specified along
1607
+ with ``metadata_offset``. If not specified or None, an empty metadata
1608
+ value is stored for each node.
1609
+ :type metadata: numpy.ndarray, dtype=np.int8
1610
+ :param metadata_offset: The offsets into the ``metadata`` array.
1611
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
1612
+ """
1613
+ self._check_required_args(flags=flags, time=time)
1614
+ self.ll_table.append_columns(
1615
+ dict(
1616
+ flags=flags,
1617
+ time=time,
1618
+ population=population,
1619
+ individual=individual,
1620
+ metadata=metadata,
1621
+ metadata_offset=metadata_offset,
1622
+ )
1623
+ )
1624
+
1625
+
1626
+ class EdgeTable(MutableMetadataTable):
1627
+ """
1628
+ A table defining the edges in a tree sequence. See the
1629
+ :ref:`definitions <sec_edge_table_definition>` for details on the columns
1630
+ in this table and the
1631
+ :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section
1632
+ for the properties needed for an edge table to be a part of a valid tree sequence.
1633
+
1634
+ .. include:: substitutions/table_edit_warning.rst
1635
+
1636
+ :ivar left: The array of left coordinates.
1637
+ :vartype left: numpy.ndarray, dtype=np.float64
1638
+ :ivar right: The array of right coordinates.
1639
+ :vartype right: numpy.ndarray, dtype=np.float64
1640
+ :ivar parent: The array of parent node IDs.
1641
+ :vartype parent: numpy.ndarray, dtype=np.int32
1642
+ :ivar child: The array of child node IDs.
1643
+ :vartype child: numpy.ndarray, dtype=np.int32
1644
+ :ivar metadata: The flattened array of binary metadata values. See
1645
+ :ref:`sec_tables_api_binary_columns` for more details.
1646
+ :vartype metadata: numpy.ndarray, dtype=np.int8
1647
+ :ivar metadata_offset: The array of offsets into the metadata column. See
1648
+ :ref:`sec_tables_api_binary_columns` for more details.
1649
+ :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
1650
+ :ivar metadata_schema: The metadata schema for this table's metadata column
1651
+ :vartype metadata_schema: tskit.MetadataSchema
1652
+ """
1653
+
1654
+ table_name = "edges"
1655
+ column_names = [
1656
+ "left",
1657
+ "right",
1658
+ "parent",
1659
+ "child",
1660
+ "metadata",
1661
+ "metadata_offset",
1662
+ ]
1663
+
1664
+ def __init__(self, max_rows_increment=0, ll_table=None):
1665
+ if ll_table is None:
1666
+ ll_table = _tskit.EdgeTable(max_rows_increment=max_rows_increment)
1667
+ super().__init__(ll_table, EdgeTableRow)
1668
+
1669
+ def add_row(self, left, right, parent, child, metadata=None):
1670
+ """
1671
+ Adds a new row to this :class:`EdgeTable` and returns the ID of the
1672
+ corresponding edge. Metadata, if specified, will be validated and encoded
1673
+ according to the table's
1674
+ :attr:`metadata_schema<tskit.EdgeTable.metadata_schema>`.
1675
+
1676
+ :param float left: The left coordinate (inclusive).
1677
+ :param float right: The right coordinate (exclusive).
1678
+ :param int parent: The ID of parent node.
1679
+ :param int child: The ID of child node.
1680
+ :param object metadata: Any object that is valid metadata for the table's schema.
1681
+ Defaults to the default metadata value for the table's schema. This is
1682
+ typically ``{}``. For no schema, ``None``.
1683
+ :return: The ID of the newly added edge.
1684
+ :rtype: int
1685
+ """
1686
+ if metadata is None:
1687
+ metadata = self.metadata_schema.empty_value
1688
+ metadata = self.metadata_schema.validate_and_encode_row(metadata)
1689
+ return self.ll_table.add_row(left, right, parent, child, metadata)
1690
+
1691
+ def set_columns(
1692
+ self,
1693
+ left=None,
1694
+ right=None,
1695
+ parent=None,
1696
+ child=None,
1697
+ metadata=None,
1698
+ metadata_offset=None,
1699
+ metadata_schema=None,
1700
+ ):
1701
+ """
1702
+ Sets the values for each column in this :class:`EdgeTable` using the values
1703
+ in the specified arrays. Overwrites existing data in all the table columns.
1704
+
1705
+ The ``left``, ``right``, ``parent`` and ``child`` parameters are mandatory,
1706
+ and must be numpy arrays of the same length (which is equal to the number of
1707
+ edges the table will contain).
1708
+ The ``metadata`` and ``metadata_offset`` parameters must be supplied together,
1709
+ and meet the requirements for :ref:`sec_encoding_ragged_columns`.
1710
+ See :ref:`sec_tables_api_binary_columns` for more information and
1711
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
1712
+
1713
+
1714
+ :param left: The left coordinates (inclusive).
1715
+ :type left: numpy.ndarray, dtype=np.float64
1716
+ :param right: The right coordinates (exclusive).
1717
+ :type right: numpy.ndarray, dtype=np.float64
1718
+ :param parent: The parent node IDs.
1719
+ :type parent: numpy.ndarray, dtype=np.int32
1720
+ :param child: The child node IDs.
1721
+ :type child: numpy.ndarray, dtype=np.int32
1722
+ :param metadata: The flattened metadata array. Must be specified along
1723
+ with ``metadata_offset``. If not specified or None, an empty metadata
1724
+ value is stored for each node.
1725
+ :type metadata: numpy.ndarray, dtype=np.int8
1726
+ :param metadata_offset: The offsets into the ``metadata`` array.
1727
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
1728
+ :param metadata_schema: The encoded metadata schema. If None (default)
1729
+ do not overwrite the exising schema. Note that a schema will need to be
1730
+ encoded as a string, e.g. via ``repr(new_metadata_schema)``.
1731
+ :type metadata_schema: str
1732
+ """
1733
+ self._check_required_args(left=left, right=right, parent=parent, child=child)
1734
+ self.ll_table.set_columns(
1735
+ dict(
1736
+ left=left,
1737
+ right=right,
1738
+ parent=parent,
1739
+ child=child,
1740
+ metadata=metadata,
1741
+ metadata_offset=metadata_offset,
1742
+ metadata_schema=metadata_schema,
1743
+ )
1744
+ )
1745
+
1746
+ def append_columns(
1747
+ self, left, right, parent, child, metadata=None, metadata_offset=None
1748
+ ):
1749
+ """
1750
+ Appends the specified arrays to the end of the columns of this
1751
+ :class:`EdgeTable`. This allows many new rows to be added at once.
1752
+
1753
+ The ``left``, ``right``, ``parent`` and ``child`` parameters are mandatory,
1754
+ and must be numpy arrays of the same length (which is equal to the number of
1755
+ additional edges to add to the table). The ``metadata`` and
1756
+ ``metadata_offset`` parameters must be supplied together, and
1757
+ meet the requirements for :ref:`sec_encoding_ragged_columns`.
1758
+ See :ref:`sec_tables_api_binary_columns` for more information and
1759
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
1760
+
1761
+
1762
+ :param left: The left coordinates (inclusive).
1763
+ :type left: numpy.ndarray, dtype=np.float64
1764
+ :param right: The right coordinates (exclusive).
1765
+ :type right: numpy.ndarray, dtype=np.float64
1766
+ :param parent: The parent node IDs.
1767
+ :type parent: numpy.ndarray, dtype=np.int32
1768
+ :param child: The child node IDs.
1769
+ :type child: numpy.ndarray, dtype=np.int32
1770
+ :param metadata: The flattened metadata array. Must be specified along
1771
+ with ``metadata_offset``. If not specified or None, an empty metadata
1772
+ value is stored for each node.
1773
+ :type metadata: numpy.ndarray, dtype=np.int8
1774
+ :param metadata_offset: The offsets into the ``metadata`` array.
1775
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
1776
+ """
1777
+ self.ll_table.append_columns(
1778
+ dict(
1779
+ left=left,
1780
+ right=right,
1781
+ parent=parent,
1782
+ child=child,
1783
+ metadata=metadata,
1784
+ metadata_offset=metadata_offset,
1785
+ )
1786
+ )
1787
+
1788
+ def squash(self):
1789
+ """
1790
+ Sorts, then condenses the table into the smallest possible number of rows by
1791
+ combining any adjacent edges.
1792
+ A pair of edges is said to be `adjacent` if they have the same parent and child
1793
+ nodes, and if the left coordinate of one of the edges is equal to the right
1794
+ coordinate of the other edge.
1795
+ The ``squash`` method modifies an :class:`EdgeTable` in place so that any set of
1796
+ adjacent edges is replaced by a single edge.
1797
+ The new edge will have the same parent and child node, a left coordinate
1798
+ equal to the smallest left coordinate in the set, and a right coordinate
1799
+ equal to the largest right coordinate in the set.
1800
+ The new edge table will be sorted in the order (P, C, L, R): if the node table
1801
+ is ordered by increasing node time, as is common, this order will meet the
1802
+ :ref:`sec_edge_requirements` for a valid tree sequence, otherwise you will need
1803
+ to call :meth:`.sort` on the entire :class:`TableCollection`.
1804
+
1805
+ .. note::
1806
+ Note that this method will fail if any edges have non-empty metadata.
1807
+
1808
+ """
1809
+ self.ll_table.squash()
1810
+
1811
+
1812
+ class MigrationTable(MutableMetadataTable):
1813
+ """
1814
+ A table defining the migrations in a tree sequence. See the
1815
+ :ref:`definitions <sec_migration_table_definition>` for details on the columns
1816
+ in this table and the
1817
+ :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section
1818
+ for the properties needed for a migration table to be a part of a valid tree
1819
+ sequence.
1820
+
1821
+ .. include:: substitutions/table_edit_warning.rst
1822
+
1823
+ :ivar left: The array of left coordinates.
1824
+ :vartype left: numpy.ndarray, dtype=np.float64
1825
+ :ivar right: The array of right coordinates.
1826
+ :vartype right: numpy.ndarray, dtype=np.float64
1827
+ :ivar node: The array of node IDs.
1828
+ :vartype node: numpy.ndarray, dtype=np.int32
1829
+ :ivar source: The array of source population IDs.
1830
+ :vartype source: numpy.ndarray, dtype=np.int32
1831
+ :ivar dest: The array of destination population IDs.
1832
+ :vartype dest: numpy.ndarray, dtype=np.int32
1833
+ :ivar time: The array of time values.
1834
+ :vartype time: numpy.ndarray, dtype=np.float64
1835
+ :ivar metadata: The flattened array of binary metadata values. See
1836
+ :ref:`sec_tables_api_binary_columns` for more details.
1837
+ :vartype metadata: numpy.ndarray, dtype=np.int8
1838
+ :ivar metadata_offset: The array of offsets into the metadata column. See
1839
+ :ref:`sec_tables_api_binary_columns` for more details.
1840
+ :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
1841
+ :ivar metadata_schema: The metadata schema for this table's metadata column
1842
+ :vartype metadata_schema: tskit.MetadataSchema
1843
+ """
1844
+
1845
+ table_name = "migrations"
1846
+ column_names = [
1847
+ "left",
1848
+ "right",
1849
+ "node",
1850
+ "source",
1851
+ "dest",
1852
+ "time",
1853
+ "metadata",
1854
+ "metadata_offset",
1855
+ ]
1856
+
1857
+ def __init__(self, max_rows_increment=0, ll_table=None):
1858
+ if ll_table is None:
1859
+ ll_table = _tskit.MigrationTable(max_rows_increment=max_rows_increment)
1860
+ super().__init__(ll_table, MigrationTableRow)
1861
+
1862
+ def add_row(self, left, right, node, source, dest, time, metadata=None):
1863
+ """
1864
+ Adds a new row to this :class:`MigrationTable` and returns the ID of the
1865
+ corresponding migration. Metadata, if specified, will be validated and encoded
1866
+ according to the table's
1867
+ :attr:`metadata_schema<tskit.MigrationTable.metadata_schema>`.
1868
+
1869
+ :param float left: The left coordinate (inclusive).
1870
+ :param float right: The right coordinate (exclusive).
1871
+ :param int node: The node ID.
1872
+ :param int source: The ID of the source population.
1873
+ :param int dest: The ID of the destination population.
1874
+ :param float time: The time of the migration event.
1875
+ :param object metadata: Any object that is valid metadata for the table's schema.
1876
+ Defaults to the default metadata value for the table's schema. This is
1877
+ typically ``{}``. For no schema, ``None``.
1878
+ :return: The ID of the newly added migration.
1879
+ :rtype: int
1880
+ """
1881
+ if metadata is None:
1882
+ metadata = self.metadata_schema.empty_value
1883
+ metadata = self.metadata_schema.validate_and_encode_row(metadata)
1884
+ return self.ll_table.add_row(left, right, node, source, dest, time, metadata)
1885
+
1886
+ def set_columns(
1887
+ self,
1888
+ left=None,
1889
+ right=None,
1890
+ node=None,
1891
+ source=None,
1892
+ dest=None,
1893
+ time=None,
1894
+ metadata=None,
1895
+ metadata_offset=None,
1896
+ metadata_schema=None,
1897
+ ):
1898
+ """
1899
+ Sets the values for each column in this :class:`MigrationTable` using the values
1900
+ in the specified arrays. Overwrites existing data in all the table columns.
1901
+
1902
+ All parameters except ``metadata`` and ``metadata_offset`` and are mandatory,
1903
+ and must be numpy arrays of the same length (which is equal to the number of
1904
+ migrations the table will contain).
1905
+ The ``metadata`` and ``metadata_offset`` parameters must be supplied together,
1906
+ and meet the requirements for :ref:`sec_encoding_ragged_columns`.
1907
+ See :ref:`sec_tables_api_binary_columns` for more information and
1908
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
1909
+
1910
+ :param left: The left coordinates (inclusive).
1911
+ :type left: numpy.ndarray, dtype=np.float64
1912
+ :param right: The right coordinates (exclusive).
1913
+ :type right: numpy.ndarray, dtype=np.float64
1914
+ :param node: The node IDs.
1915
+ :type node: numpy.ndarray, dtype=np.int32
1916
+ :param source: The source population IDs.
1917
+ :type source: numpy.ndarray, dtype=np.int32
1918
+ :param dest: The destination population IDs.
1919
+ :type dest: numpy.ndarray, dtype=np.int32
1920
+ :param time: The time of each migration.
1921
+ :type time: numpy.ndarray, dtype=np.int64
1922
+ :param metadata: The flattened metadata array. Must be specified along
1923
+ with ``metadata_offset``. If not specified or None, an empty metadata
1924
+ value is stored for each migration.
1925
+ :type metadata: numpy.ndarray, dtype=np.int8
1926
+ :param metadata_offset: The offsets into the ``metadata`` array.
1927
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
1928
+ :param metadata_schema: The encoded metadata schema. If None (default)
1929
+ do not overwrite the exising schema. Note that a schema will need to be
1930
+ encoded as a string, e.g. via ``repr(new_metadata_schema)``.
1931
+ :type metadata_schema: str
1932
+ """
1933
+ self._check_required_args(
1934
+ left=left, right=right, node=node, source=source, dest=dest, time=time
1935
+ )
1936
+ self.ll_table.set_columns(
1937
+ dict(
1938
+ left=left,
1939
+ right=right,
1940
+ node=node,
1941
+ source=source,
1942
+ dest=dest,
1943
+ time=time,
1944
+ metadata=metadata,
1945
+ metadata_offset=metadata_offset,
1946
+ metadata_schema=metadata_schema,
1947
+ )
1948
+ )
1949
+
1950
+ def append_columns(
1951
+ self,
1952
+ left,
1953
+ right,
1954
+ node,
1955
+ source,
1956
+ dest,
1957
+ time,
1958
+ metadata=None,
1959
+ metadata_offset=None,
1960
+ ):
1961
+ """
1962
+ Appends the specified arrays to the end of the columns of this
1963
+ :class:`MigrationTable`. This allows many new rows to be added at once.
1964
+
1965
+ All parameters except ``metadata`` and ``metadata_offset`` and are mandatory,
1966
+ and must be numpy arrays of the same length (which is equal to the number of
1967
+ additional migrations to add to the table). The ``metadata`` and
1968
+ ``metadata_offset`` parameters must be supplied together, and
1969
+ meet the requirements for :ref:`sec_encoding_ragged_columns`.
1970
+ See :ref:`sec_tables_api_binary_columns` for more information and
1971
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
1972
+
1973
+ :param left: The left coordinates (inclusive).
1974
+ :type left: numpy.ndarray, dtype=np.float64
1975
+ :param right: The right coordinates (exclusive).
1976
+ :type right: numpy.ndarray, dtype=np.float64
1977
+ :param node: The node IDs.
1978
+ :type node: numpy.ndarray, dtype=np.int32
1979
+ :param source: The source population IDs.
1980
+ :type source: numpy.ndarray, dtype=np.int32
1981
+ :param dest: The destination population IDs.
1982
+ :type dest: numpy.ndarray, dtype=np.int32
1983
+ :param time: The time of each migration.
1984
+ :type time: numpy.ndarray, dtype=np.int64
1985
+ :param metadata: The flattened metadata array. Must be specified along
1986
+ with ``metadata_offset``. If not specified or None, an empty metadata
1987
+ value is stored for each migration.
1988
+ :type metadata: numpy.ndarray, dtype=np.int8
1989
+ :param metadata_offset: The offsets into the ``metadata`` array.
1990
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
1991
+ """
1992
+ self.ll_table.append_columns(
1993
+ dict(
1994
+ left=left,
1995
+ right=right,
1996
+ node=node,
1997
+ source=source,
1998
+ dest=dest,
1999
+ time=time,
2000
+ metadata=metadata,
2001
+ metadata_offset=metadata_offset,
2002
+ )
2003
+ )
2004
+
2005
+
2006
+ class SiteTable(MutableMetadataTable):
2007
+ """
2008
+ A table defining the sites in a tree sequence. See the
2009
+ :ref:`definitions <sec_site_table_definition>` for details on the columns
2010
+ in this table and the
2011
+ :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section
2012
+ for the properties needed for a site table to be a part of a valid tree
2013
+ sequence.
2014
+
2015
+ .. include:: substitutions/table_edit_warning.rst
2016
+
2017
+ :ivar position: The array of site position coordinates.
2018
+ :vartype position: numpy.ndarray, dtype=np.float64
2019
+ :ivar ancestral_state: The flattened array of ancestral state strings.
2020
+ See :ref:`sec_tables_api_text_columns` for more details.
2021
+ :vartype ancestral_state: numpy.ndarray, dtype=np.int8
2022
+ :ivar ancestral_state_offset: The offsets of rows in the ancestral_state
2023
+ array. See :ref:`sec_tables_api_text_columns` for more details.
2024
+ :vartype ancestral_state_offset: numpy.ndarray, dtype=np.uint32
2025
+ :ivar metadata: The flattened array of binary metadata values. See
2026
+ :ref:`sec_tables_api_binary_columns` for more details.
2027
+ :vartype metadata: numpy.ndarray, dtype=np.int8
2028
+ :ivar metadata_offset: The array of offsets into the metadata column. See
2029
+ :ref:`sec_tables_api_binary_columns` for more details.
2030
+ :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
2031
+ :ivar metadata_schema: The metadata schema for this table's metadata column
2032
+ :vartype metadata_schema: tskit.MetadataSchema
2033
+ """
2034
+
2035
+ table_name = "sites"
2036
+ column_names = [
2037
+ "position",
2038
+ "ancestral_state",
2039
+ "ancestral_state_offset",
2040
+ "metadata",
2041
+ "metadata_offset",
2042
+ ]
2043
+
2044
+ def __init__(self, max_rows_increment=0, ll_table=None):
2045
+ if ll_table is None:
2046
+ ll_table = _tskit.SiteTable(max_rows_increment=max_rows_increment)
2047
+ super().__init__(ll_table, SiteTableRow)
2048
+
2049
+ def add_row(self, position, ancestral_state, metadata=None):
2050
+ """
2051
+ Adds a new row to this :class:`SiteTable` and returns the ID of the
2052
+ corresponding site. Metadata, if specified, will be validated and encoded
2053
+ according to the table's
2054
+ :attr:`metadata_schema<tskit.SiteTable.metadata_schema>`.
2055
+
2056
+ :param float position: The position of this site in genome coordinates.
2057
+ :param str ancestral_state: The state of this site at the root of the tree.
2058
+ :param object metadata: Any object that is valid metadata for the table's schema.
2059
+ Defaults to the default metadata value for the table's schema. This is
2060
+ typically ``{}``. For no schema, ``None``.
2061
+ :return: The ID of the newly added site.
2062
+ :rtype: int
2063
+ """
2064
+ if metadata is None:
2065
+ metadata = self.metadata_schema.empty_value
2066
+ metadata = self.metadata_schema.validate_and_encode_row(metadata)
2067
+ return self.ll_table.add_row(position, ancestral_state, metadata)
2068
+
2069
+ def set_columns(
2070
+ self,
2071
+ position=None,
2072
+ ancestral_state=None,
2073
+ ancestral_state_offset=None,
2074
+ metadata=None,
2075
+ metadata_offset=None,
2076
+ metadata_schema=None,
2077
+ ):
2078
+ """
2079
+ Sets the values for each column in this :class:`SiteTable` using the values
2080
+ in the specified arrays. Overwrites existing data in all the table columns.
2081
+
2082
+ The ``position``, ``ancestral_state`` and ``ancestral_state_offset``
2083
+ parameters are mandatory, and must be 1D numpy arrays. The length
2084
+ of the ``position`` array determines the number of rows in table.
2085
+ The ``ancestral_state`` and ``ancestral_state_offset`` parameters must
2086
+ be supplied together, and meet the requirements for
2087
+ :ref:`sec_encoding_ragged_columns` (see
2088
+ :ref:`sec_tables_api_text_columns` for more information). The
2089
+ ``metadata`` and ``metadata_offset`` parameters must be supplied
2090
+ together, and meet the requirements for
2091
+ :ref:`sec_encoding_ragged_columns` (see
2092
+ :ref:`sec_tables_api_binary_columns` for more information) and
2093
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
2094
+
2095
+ :param position: The position of each site in genome coordinates.
2096
+ :type position: numpy.ndarray, dtype=np.float64
2097
+ :param ancestral_state: The flattened ancestral_state array. Required.
2098
+ :type ancestral_state: numpy.ndarray, dtype=np.int8
2099
+ :param ancestral_state_offset: The offsets into the ``ancestral_state`` array.
2100
+ :type ancestral_state_offset: numpy.ndarray, dtype=np.uint32.
2101
+ :param metadata: The flattened metadata array. Must be specified along
2102
+ with ``metadata_offset``. If not specified or None, an empty metadata
2103
+ value is stored for each node.
2104
+ :type metadata: numpy.ndarray, dtype=np.int8
2105
+ :param metadata_offset: The offsets into the ``metadata`` array.
2106
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
2107
+ :param metadata_schema: The encoded metadata schema. If None (default)
2108
+ do not overwrite the exising schema. Note that a schema will need to be
2109
+ encoded as a string, e.g. via ``repr(new_metadata_schema)``.
2110
+ :type metadata_schema: str
2111
+ """
2112
+ self._check_required_args(
2113
+ position=position,
2114
+ ancestral_state=ancestral_state,
2115
+ ancestral_state_offset=ancestral_state_offset,
2116
+ )
2117
+ self.ll_table.set_columns(
2118
+ dict(
2119
+ position=position,
2120
+ ancestral_state=ancestral_state,
2121
+ ancestral_state_offset=ancestral_state_offset,
2122
+ metadata=metadata,
2123
+ metadata_offset=metadata_offset,
2124
+ metadata_schema=metadata_schema,
2125
+ )
2126
+ )
2127
+
2128
+ def append_columns(
2129
+ self,
2130
+ position,
2131
+ ancestral_state,
2132
+ ancestral_state_offset,
2133
+ metadata=None,
2134
+ metadata_offset=None,
2135
+ ):
2136
+ """
2137
+ Appends the specified arrays to the end of the columns of this
2138
+ :class:`SiteTable`. This allows many new rows to be added at once.
2139
+
2140
+ The ``position``, ``ancestral_state`` and ``ancestral_state_offset``
2141
+ parameters are mandatory, and must be 1D numpy arrays. The length
2142
+ of the ``position`` array determines the number of additional rows
2143
+ to add the table.
2144
+ The ``ancestral_state`` and ``ancestral_state_offset`` parameters must
2145
+ be supplied together, and meet the requirements for
2146
+ :ref:`sec_encoding_ragged_columns` (see
2147
+ :ref:`sec_tables_api_text_columns` for more information). The
2148
+ ``metadata`` and ``metadata_offset`` parameters must be supplied
2149
+ together, and meet the requirements for
2150
+ :ref:`sec_encoding_ragged_columns` (see
2151
+ :ref:`sec_tables_api_binary_columns` for more information) and
2152
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
2153
+
2154
+ :param position: The position of each site in genome coordinates.
2155
+ :type position: numpy.ndarray, dtype=np.float64
2156
+ :param ancestral_state: The flattened ancestral_state array. Required.
2157
+ :type ancestral_state: numpy.ndarray, dtype=np.int8
2158
+ :param ancestral_state_offset: The offsets into the ``ancestral_state`` array.
2159
+ :type ancestral_state_offset: numpy.ndarray, dtype=np.uint32.
2160
+ :param metadata: The flattened metadata array. Must be specified along
2161
+ with ``metadata_offset``. If not specified or None, an empty metadata
2162
+ value is stored for each node.
2163
+ :type metadata: numpy.ndarray, dtype=np.int8
2164
+ :param metadata_offset: The offsets into the ``metadata`` array.
2165
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
2166
+ """
2167
+ self.ll_table.append_columns(
2168
+ dict(
2169
+ position=position,
2170
+ ancestral_state=ancestral_state,
2171
+ ancestral_state_offset=ancestral_state_offset,
2172
+ metadata=metadata,
2173
+ metadata_offset=metadata_offset,
2174
+ )
2175
+ )
2176
+
2177
+ def packset_ancestral_state(self, ancestral_states):
2178
+ """
2179
+ Packs the specified list of ancestral_state values and updates the
2180
+ ``ancestral_state`` and ``ancestral_state_offset`` columns. The length
2181
+ of the ancestral_states array must be equal to the number of rows in
2182
+ the table.
2183
+
2184
+ :param list(str) ancestral_states: A list of string ancestral state values.
2185
+ """
2186
+ packed, offset = util.pack_strings(ancestral_states)
2187
+ d = self.asdict()
2188
+ d["ancestral_state"] = packed
2189
+ d["ancestral_state_offset"] = offset
2190
+ self.set_columns(**d)
2191
+
2192
+
2193
+ class MutationTable(MutableMetadataTable):
2194
+ """
2195
+ A table defining the mutations in a tree sequence. See the
2196
+ :ref:`definitions <sec_mutation_table_definition>` for details on the columns
2197
+ in this table and the
2198
+ :ref:`tree sequence requirements <sec_valid_tree_sequence_requirements>` section
2199
+ for the properties needed for a mutation table to be a part of a valid tree
2200
+ sequence.
2201
+
2202
+ .. include:: substitutions/table_edit_warning.rst
2203
+
2204
+ :ivar site: The array of site IDs.
2205
+ :vartype site: numpy.ndarray, dtype=np.int32
2206
+ :ivar node: The array of node IDs.
2207
+ :vartype node: numpy.ndarray, dtype=np.int32
2208
+ :ivar time: The array of time values.
2209
+ :vartype time: numpy.ndarray, dtype=np.float64
2210
+ :ivar derived_state: The flattened array of derived state strings.
2211
+ See :ref:`sec_tables_api_text_columns` for more details.
2212
+ :vartype derived_state: numpy.ndarray, dtype=np.int8
2213
+ :ivar derived_state_offset: The offsets of rows in the derived_state
2214
+ array. See :ref:`sec_tables_api_text_columns` for more details.
2215
+ :vartype derived_state_offset: numpy.ndarray, dtype=np.uint32
2216
+ :ivar parent: The array of parent mutation IDs.
2217
+ :vartype parent: numpy.ndarray, dtype=np.int32
2218
+ :ivar metadata: The flattened array of binary metadata values. See
2219
+ :ref:`sec_tables_api_binary_columns` for more details.
2220
+ :vartype metadata: numpy.ndarray, dtype=np.int8
2221
+ :ivar metadata_offset: The array of offsets into the metadata column. See
2222
+ :ref:`sec_tables_api_binary_columns` for more details.
2223
+ :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
2224
+ :ivar metadata_schema: The metadata schema for this table's metadata column
2225
+ :vartype metadata_schema: tskit.MetadataSchema
2226
+ """
2227
+
2228
+ table_name = "mutations"
2229
+ column_names = [
2230
+ "site",
2231
+ "node",
2232
+ "time",
2233
+ "derived_state",
2234
+ "derived_state_offset",
2235
+ "parent",
2236
+ "metadata",
2237
+ "metadata_offset",
2238
+ ]
2239
+
2240
+ def __init__(self, max_rows_increment=0, ll_table=None):
2241
+ if ll_table is None:
2242
+ ll_table = _tskit.MutationTable(max_rows_increment=max_rows_increment)
2243
+ super().__init__(ll_table, MutationTableRow)
2244
+
2245
+ def add_row(self, site, node, derived_state, parent=-1, metadata=None, time=None):
2246
+ """
2247
+ Adds a new row to this :class:`MutationTable` and returns the ID of the
2248
+ corresponding mutation. Metadata, if specified, will be validated and encoded
2249
+ according to the table's
2250
+ :attr:`metadata_schema<tskit.MutationTable.metadata_schema>`.
2251
+
2252
+ :param int site: The ID of the site that this mutation occurs at.
2253
+ :param int node: The ID of the first node inheriting this mutation.
2254
+ :param str derived_state: The state of the site at this mutation's node.
2255
+ :param int parent: The ID of the parent mutation. If not specified,
2256
+ defaults to :attr:`NULL`.
2257
+ :param object metadata: Any object that is valid metadata for the table's schema.
2258
+ Defaults to the default metadata value for the table's schema. This is
2259
+ typically ``{}``. For no schema, ``None``.
2260
+ :return: The ID of the newly added mutation.
2261
+ :param float time: The occurrence time for the new mutation. If not specified,
2262
+ defaults to ``UNKNOWN_TIME``, indicating the time is unknown.
2263
+ :rtype: int
2264
+ """
2265
+ if metadata is None:
2266
+ metadata = self.metadata_schema.empty_value
2267
+ metadata = self.metadata_schema.validate_and_encode_row(metadata)
2268
+ return self.ll_table.add_row(
2269
+ site,
2270
+ node,
2271
+ derived_state,
2272
+ parent,
2273
+ metadata,
2274
+ UNKNOWN_TIME if time is None else time,
2275
+ )
2276
+
2277
+ def set_columns(
2278
+ self,
2279
+ site=None,
2280
+ node=None,
2281
+ time=None,
2282
+ derived_state=None,
2283
+ derived_state_offset=None,
2284
+ parent=None,
2285
+ metadata=None,
2286
+ metadata_offset=None,
2287
+ metadata_schema=None,
2288
+ ):
2289
+ """
2290
+ Sets the values for each column in this :class:`MutationTable` using the values
2291
+ in the specified arrays. Overwrites existing data in all the the table columns.
2292
+
2293
+ The ``site``, ``node``, ``derived_state`` and ``derived_state_offset``
2294
+ parameters are mandatory, and must be 1D numpy arrays. The
2295
+ ``site`` and ``node`` (also ``parent`` and ``time``, if supplied) arrays
2296
+ must be of equal length, and determine the number of rows in the table.
2297
+ The ``derived_state`` and ``derived_state_offset`` parameters must
2298
+ be supplied together, and meet the requirements for
2299
+ :ref:`sec_encoding_ragged_columns` (see
2300
+ :ref:`sec_tables_api_text_columns` for more information). The
2301
+ ``metadata`` and ``metadata_offset`` parameters must be supplied
2302
+ together, and meet the requirements for
2303
+ :ref:`sec_encoding_ragged_columns` (see
2304
+ :ref:`sec_tables_api_binary_columns` for more information) and
2305
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
2306
+
2307
+ :param site: The ID of the site each mutation occurs at.
2308
+ :type site: numpy.ndarray, dtype=np.int32
2309
+ :param node: The ID of the node each mutation is associated with.
2310
+ :type node: numpy.ndarray, dtype=np.int32
2311
+ :param time: The time values for each mutation.
2312
+ :type time: numpy.ndarray, dtype=np.float64
2313
+ :param derived_state: The flattened derived_state array. Required.
2314
+ :type derived_state: numpy.ndarray, dtype=np.int8
2315
+ :param derived_state_offset: The offsets into the ``derived_state`` array.
2316
+ :type derived_state_offset: numpy.ndarray, dtype=np.uint32.
2317
+ :param parent: The ID of the parent mutation for each mutation.
2318
+ :type parent: numpy.ndarray, dtype=np.int32
2319
+ :param metadata: The flattened metadata array. Must be specified along
2320
+ with ``metadata_offset``. If not specified or None, an empty metadata
2321
+ value is stored for each node.
2322
+ :type metadata: numpy.ndarray, dtype=np.int8
2323
+ :param metadata_offset: The offsets into the ``metadata`` array.
2324
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
2325
+ :param metadata_schema: The encoded metadata schema. If None (default)
2326
+ do not overwrite the exising schema. Note that a schema will need to be
2327
+ encoded as a string, e.g. via ``repr(new_metadata_schema)``.
2328
+ :type metadata_schema: str
2329
+ """
2330
+ self._check_required_args(
2331
+ site=site,
2332
+ node=node,
2333
+ derived_state=derived_state,
2334
+ derived_state_offset=derived_state_offset,
2335
+ )
2336
+ self.ll_table.set_columns(
2337
+ dict(
2338
+ site=site,
2339
+ node=node,
2340
+ parent=parent,
2341
+ time=time,
2342
+ derived_state=derived_state,
2343
+ derived_state_offset=derived_state_offset,
2344
+ metadata=metadata,
2345
+ metadata_offset=metadata_offset,
2346
+ metadata_schema=metadata_schema,
2347
+ )
2348
+ )
2349
+
2350
+ def append_columns(
2351
+ self,
2352
+ site,
2353
+ node,
2354
+ derived_state,
2355
+ derived_state_offset,
2356
+ parent=None,
2357
+ time=None,
2358
+ metadata=None,
2359
+ metadata_offset=None,
2360
+ ):
2361
+ """
2362
+ Appends the specified arrays to the end of the columns of this
2363
+ :class:`MutationTable`. This allows many new rows to be added at once.
2364
+
2365
+ The ``site``, ``node``, ``derived_state`` and ``derived_state_offset``
2366
+ parameters are mandatory, and must be 1D numpy arrays. The
2367
+ ``site`` and ``node`` (also ``time`` and ``parent``, if supplied) arrays
2368
+ must be of equal length, and determine the number of additional
2369
+ rows to add to the table.
2370
+ The ``derived_state`` and ``derived_state_offset`` parameters must
2371
+ be supplied together, and meet the requirements for
2372
+ :ref:`sec_encoding_ragged_columns` (see
2373
+ :ref:`sec_tables_api_text_columns` for more information). The
2374
+ ``metadata`` and ``metadata_offset`` parameters must be supplied
2375
+ together, and meet the requirements for
2376
+ :ref:`sec_encoding_ragged_columns` (see
2377
+ :ref:`sec_tables_api_binary_columns` for more information) and
2378
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
2379
+
2380
+ :param site: The ID of the site each mutation occurs at.
2381
+ :type site: numpy.ndarray, dtype=np.int32
2382
+ :param node: The ID of the node each mutation is associated with.
2383
+ :type node: numpy.ndarray, dtype=np.int32
2384
+ :param time: The time values for each mutation.
2385
+ :type time: numpy.ndarray, dtype=np.float64
2386
+ :param derived_state: The flattened derived_state array. Required.
2387
+ :type derived_state: numpy.ndarray, dtype=np.int8
2388
+ :param derived_state_offset: The offsets into the ``derived_state`` array.
2389
+ :type derived_state_offset: numpy.ndarray, dtype=np.uint32.
2390
+ :param parent: The ID of the parent mutation for each mutation.
2391
+ :type parent: numpy.ndarray, dtype=np.int32
2392
+ :param metadata: The flattened metadata array. Must be specified along
2393
+ with ``metadata_offset``. If not specified or None, an empty metadata
2394
+ value is stored for each node.
2395
+ :type metadata: numpy.ndarray, dtype=np.int8
2396
+ :param metadata_offset: The offsets into the ``metadata`` array.
2397
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
2398
+ """
2399
+ self.ll_table.append_columns(
2400
+ dict(
2401
+ site=site,
2402
+ node=node,
2403
+ time=time,
2404
+ parent=parent,
2405
+ derived_state=derived_state,
2406
+ derived_state_offset=derived_state_offset,
2407
+ metadata=metadata,
2408
+ metadata_offset=metadata_offset,
2409
+ )
2410
+ )
2411
+
2412
+ def packset_derived_state(self, derived_states):
2413
+ """
2414
+ Packs the specified list of derived_state values and updates the
2415
+ ``derived_state`` and ``derived_state_offset`` columns. The length
2416
+ of the derived_states array must be equal to the number of rows in
2417
+ the table.
2418
+
2419
+ :param list(str) derived_states: A list of string derived state values.
2420
+ """
2421
+ packed, offset = util.pack_strings(derived_states)
2422
+ d = self.asdict()
2423
+ d["derived_state"] = packed
2424
+ d["derived_state_offset"] = offset
2425
+ self.set_columns(**d)
2426
+
2427
+ def keep_rows(self, keep):
2428
+ """
2429
+ .. include:: substitutions/table_keep_rows_main.rst
2430
+
2431
+ The values in the ``parent`` column are updated according to this
2432
+ map, so that reference integrity within the table is maintained.
2433
+ As a consequence of this, the values in the ``parent`` column
2434
+ for kept rows are bounds-checked and an error raised if they
2435
+ are not valid. Rows that are deleted are not checked for
2436
+ parent ID integrity.
2437
+
2438
+ If an attempt is made to delete rows that are referred to by
2439
+ the ``parent`` column of rows that are retained, an error
2440
+ is raised.
2441
+
2442
+ These error conditions are checked before any alterations to
2443
+ the table are made.
2444
+
2445
+ :param array-like keep: The rows to keep as a boolean array. Must
2446
+ be the same length as the table, and convertible to a numpy
2447
+ array of dtype bool.
2448
+ :return: The mapping between old and new row IDs as a numpy
2449
+ array (dtype int32).
2450
+ :rtype: numpy.ndarray (dtype=np.int32)
2451
+ """
2452
+ return super().keep_rows(keep)
2453
+
2454
+
2455
+ class PopulationTable(MutableMetadataTable):
2456
+ """
2457
+ A table defining the populations referred to in a tree sequence.
2458
+ The PopulationTable stores metadata for populations that may be referred to
2459
+ in the NodeTable and MigrationTable". Note that although nodes
2460
+ may be associated with populations, this association is stored in
2461
+ the :class:`NodeTable`: only metadata on each population is stored
2462
+ in the population table.
2463
+
2464
+ .. include:: substitutions/table_edit_warning.rst
2465
+
2466
+ :ivar metadata: The flattened array of binary metadata values. See
2467
+ :ref:`sec_tables_api_binary_columns` for more details.
2468
+ :vartype metadata: numpy.ndarray, dtype=np.int8
2469
+ :ivar metadata_offset: The array of offsets into the metadata column. See
2470
+ :ref:`sec_tables_api_binary_columns` for more details.
2471
+ :vartype metadata_offset: numpy.ndarray, dtype=np.uint32
2472
+ :ivar metadata_schema: The metadata schema for this table's metadata column
2473
+ :vartype metadata_schema: tskit.MetadataSchema
2474
+ """
2475
+
2476
+ table_name = "populations"
2477
+ column_names = ["metadata", "metadata_offset"]
2478
+
2479
+ def __init__(self, max_rows_increment=0, ll_table=None):
2480
+ if ll_table is None:
2481
+ ll_table = _tskit.PopulationTable(max_rows_increment=max_rows_increment)
2482
+ super().__init__(ll_table, PopulationTableRow)
2483
+
2484
+ def add_row(self, metadata=None):
2485
+ """
2486
+ Adds a new row to this :class:`PopulationTable` and returns the ID of the
2487
+ corresponding population. Metadata, if specified, will be validated and encoded
2488
+ according to the table's
2489
+ :attr:`metadata_schema<tskit.PopulationTable.metadata_schema>`.
2490
+
2491
+ :param object metadata: Any object that is valid metadata for the table's schema.
2492
+ Defaults to the default metadata value for the table's schema. This is
2493
+ typically ``{}``. For no schema, ``None``.
2494
+ :return: The ID of the newly added population.
2495
+ :rtype: int
2496
+ """
2497
+ if metadata is None:
2498
+ metadata = self.metadata_schema.empty_value
2499
+ metadata = self.metadata_schema.validate_and_encode_row(metadata)
2500
+ return self.ll_table.add_row(metadata=metadata)
2501
+
2502
+ def set_columns(self, metadata=None, metadata_offset=None, metadata_schema=None):
2503
+ """
2504
+ Sets the values for each column in this :class:`PopulationTable` using the
2505
+ values in the specified arrays. Overwrites existing data in all the table
2506
+ columns.
2507
+
2508
+ The ``metadata`` and ``metadata_offset`` parameters must be supplied
2509
+ together, and meet the requirements for
2510
+ :ref:`sec_encoding_ragged_columns` (see
2511
+ :ref:`sec_tables_api_binary_columns` for more information) and
2512
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
2513
+
2514
+ :param metadata: The flattened metadata array. Must be specified along
2515
+ with ``metadata_offset``. If not specified or None, an empty metadata
2516
+ value is stored for each node.
2517
+ :type metadata: numpy.ndarray, dtype=np.int8
2518
+ :param metadata_offset: The offsets into the ``metadata`` array.
2519
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
2520
+ :param metadata_schema: The encoded metadata schema. If None (default)
2521
+ do not overwrite the exising schema. Note that a schema will need to be
2522
+ encoded as a string, e.g. via ``repr(new_metadata_schema)``.
2523
+ :type metadata_schema: str
2524
+ """
2525
+ self.ll_table.set_columns(
2526
+ dict(
2527
+ metadata=metadata,
2528
+ metadata_offset=metadata_offset,
2529
+ metadata_schema=metadata_schema,
2530
+ )
2531
+ )
2532
+
2533
+ def append_columns(self, metadata=None, metadata_offset=None):
2534
+ """
2535
+ Appends the specified arrays to the end of the columns of this
2536
+ :class:`PopulationTable`. This allows many new rows to be added at once.
2537
+
2538
+ The ``metadata`` and ``metadata_offset`` parameters must be supplied
2539
+ together, and meet the requirements for
2540
+ :ref:`sec_encoding_ragged_columns` (see
2541
+ :ref:`sec_tables_api_binary_columns` for more information) and
2542
+ :ref:`sec_tutorial_metadata_bulk` for an example of how to prepare metadata.
2543
+
2544
+ :param metadata: The flattened metadata array. Must be specified along
2545
+ with ``metadata_offset``. If not specified or None, an empty metadata
2546
+ value is stored for each node.
2547
+ :type metadata: numpy.ndarray, dtype=np.int8
2548
+ :param metadata_offset: The offsets into the ``metadata`` array.
2549
+ :type metadata_offset: numpy.ndarray, dtype=np.uint32.
2550
+ """
2551
+ self.ll_table.append_columns(
2552
+ dict(metadata=metadata, metadata_offset=metadata_offset)
2553
+ )
2554
+
2555
+
2556
+ class ProvenanceTable(MutableBaseTable):
2557
+ """
2558
+ A table recording the provenance (i.e., history) of this table, so that the
2559
+ origin of the underlying data and sequence of subsequent operations can be
2560
+ traced. Each row contains a "record" string (recommended format: JSON) and
2561
+ a timestamp.
2562
+
2563
+ .. todo::
2564
+ The format of the `record` field will be more precisely specified in
2565
+ the future.
2566
+
2567
+ :ivar record: The flattened array containing the record strings.
2568
+ :ref:`sec_tables_api_text_columns` for more details.
2569
+ :vartype record: numpy.ndarray, dtype=np.int8
2570
+ :ivar record_offset: The array of offsets into the record column. See
2571
+ :ref:`sec_tables_api_text_columns` for more details.
2572
+ :vartype record_offset: numpy.ndarray, dtype=np.uint32
2573
+ :ivar timestamp: The flattened array containing the timestamp strings.
2574
+ :ref:`sec_tables_api_text_columns` for more details.
2575
+ :vartype timestamp: numpy.ndarray, dtype=np.int8
2576
+ :ivar timestamp_offset: The array of offsets into the timestamp column. See
2577
+ :ref:`sec_tables_api_text_columns` for more details.
2578
+ :vartype timestamp_offset: numpy.ndarray, dtype=np.uint32
2579
+ """
2580
+
2581
+ table_name = "provenances"
2582
+ column_names = ["record", "record_offset", "timestamp", "timestamp_offset"]
2583
+
2584
+ def __init__(self, max_rows_increment=0, ll_table=None):
2585
+ if ll_table is None:
2586
+ ll_table = _tskit.ProvenanceTable(max_rows_increment=max_rows_increment)
2587
+ super().__init__(ll_table, ProvenanceTableRow)
2588
+
2589
+ def add_row(self, record, timestamp=None):
2590
+ """
2591
+ Adds a new row to this ProvenanceTable consisting of the specified record and
2592
+ timestamp. If timestamp is not specified, it is automatically generated from
2593
+ the current time.
2594
+
2595
+ :param str record: A provenance record, describing the parameters and
2596
+ environment used to generate the current set of tables.
2597
+ :param str timestamp: A string timestamp. This should be in ISO8601 form.
2598
+ """
2599
+ if timestamp is None:
2600
+ timestamp = datetime.datetime.now().isoformat()
2601
+ # Note that the order of the positional arguments has been reversed
2602
+ # from the low-level module, which is a bit confusing. However, we
2603
+ # want the default behaviour here to be to add a row to the table at
2604
+ # the current time as simply as possible.
2605
+ return self.ll_table.add_row(record=record, timestamp=timestamp)
2606
+
2607
+ def set_columns(
2608
+ self, timestamp=None, timestamp_offset=None, record=None, record_offset=None
2609
+ ):
2610
+ """
2611
+ Sets the values for each column in this :class:`ProvenanceTable` using the
2612
+ values in the specified arrays. Overwrites existing data in all the table
2613
+ columns.
2614
+
2615
+ The ``timestamp`` and ``timestamp_offset`` parameters must be supplied
2616
+ together, and meet the requirements for
2617
+ :ref:`sec_encoding_ragged_columns` (see
2618
+ :ref:`sec_tables_api_binary_columns` for more information). Likewise
2619
+ for the ``record`` and ``record_offset`` columns
2620
+
2621
+ :param timestamp: The flattened timestamp array. Must be specified along
2622
+ with ``timestamp_offset``. If not specified or None, an empty timestamp
2623
+ value is stored for each node.
2624
+ :type timestamp: numpy.ndarray, dtype=np.int8
2625
+ :param timestamp_offset: The offsets into the ``timestamp`` array.
2626
+ :type timestamp_offset: numpy.ndarray, dtype=np.uint32.
2627
+ :param record: The flattened record array. Must be specified along
2628
+ with ``record_offset``. If not specified or None, an empty record
2629
+ value is stored for each node.
2630
+ :type record: numpy.ndarray, dtype=np.int8
2631
+ :param record_offset: The offsets into the ``record`` array.
2632
+ :type record_offset: numpy.ndarray, dtype=np.uint32.
2633
+ """
2634
+ self.ll_table.set_columns(
2635
+ dict(
2636
+ timestamp=timestamp,
2637
+ timestamp_offset=timestamp_offset,
2638
+ record=record,
2639
+ record_offset=record_offset,
2640
+ )
2641
+ )
2642
+
2643
+ def append_columns(
2644
+ self, timestamp=None, timestamp_offset=None, record=None, record_offset=None
2645
+ ):
2646
+ """
2647
+ Appends the specified arrays to the end of the columns of this
2648
+ :class:`ProvenanceTable`. This allows many new rows to be added at once.
2649
+
2650
+ The ``timestamp`` and ``timestamp_offset`` parameters must be supplied
2651
+ together, and meet the requirements for
2652
+ :ref:`sec_encoding_ragged_columns` (see
2653
+ :ref:`sec_tables_api_binary_columns` for more information). Likewise
2654
+ for the ``record`` and ``record_offset`` columns
2655
+
2656
+ :param timestamp: The flattened timestamp array. Must be specified along
2657
+ with ``timestamp_offset``. If not specified or None, an empty timestamp
2658
+ value is stored for each node.
2659
+ :type timestamp: numpy.ndarray, dtype=np.int8
2660
+ :param timestamp_offset: The offsets into the ``timestamp`` array.
2661
+ :type timestamp_offset: numpy.ndarray, dtype=np.uint32.
2662
+ :param record: The flattened record array. Must be specified along
2663
+ with ``record_offset``. If not specified or None, an empty record
2664
+ value is stored for each node.
2665
+ :type record: numpy.ndarray, dtype=np.int8
2666
+ :param record_offset: The offsets into the ``record`` array.
2667
+ :type record_offset: numpy.ndarray, dtype=np.uint32.
2668
+ """
2669
+ self.ll_table.append_columns(
2670
+ dict(
2671
+ timestamp=timestamp,
2672
+ timestamp_offset=timestamp_offset,
2673
+ record=record,
2674
+ record_offset=record_offset,
2675
+ )
2676
+ )
2677
+
2678
+ def packset_record(self, records):
2679
+ """
2680
+ Packs the specified list of record values and updates the
2681
+ ``record`` and ``record_offset`` columns. The length
2682
+ of the records array must be equal to the number of rows in
2683
+ the table.
2684
+
2685
+ :param list(str) records: A list of string record values.
2686
+ """
2687
+ packed, offset = util.pack_strings(records)
2688
+ d = self.asdict()
2689
+ d["record"] = packed
2690
+ d["record_offset"] = offset
2691
+ self.set_columns(**d)
2692
+
2693
+ def packset_timestamp(self, timestamps):
2694
+ """
2695
+ Packs the specified list of timestamp values and updates the
2696
+ ``timestamp`` and ``timestamp_offset`` columns. The length
2697
+ of the timestamps array must be equal to the number of rows in
2698
+ the table.
2699
+
2700
+ :param list(str) timestamps: A list of string timestamp values.
2701
+ """
2702
+ packed, offset = util.pack_strings(timestamps)
2703
+ d = self.asdict()
2704
+ d["timestamp"] = packed
2705
+ d["timestamp_offset"] = offset
2706
+ self.set_columns(**d)
2707
+
2708
+ def equals(self, other, ignore_timestamps=False):
2709
+ """
2710
+ Returns True if `self` and `other` are equal. By default, two provenance
2711
+ tables are considered equal if their columns are byte-for-byte identical.
2712
+
2713
+ :param other: Another provenance table instance
2714
+ :param bool ignore_timestamps: If True exclude the timestamp column
2715
+ from the comparison.
2716
+ :return: True if other is equal to this provenance table; False otherwise.
2717
+ :rtype: bool
2718
+ """
2719
+ return self._equals_internal(other, ignore_timestamps=ignore_timestamps)
2720
+
2721
+ def assert_equals(self, other, *, ignore_timestamps=False):
2722
+ """
2723
+ Raise an AssertionError for the first found difference between
2724
+ this and another provenance table.
2725
+
2726
+ :param other: Another provenance table instance
2727
+ :param bool ignore_timestamps: If True exclude the timestamp column
2728
+ from the comparison.
2729
+ """
2730
+ self._assert_equals_internal(other, ignore_timestamps=ignore_timestamps)
2731
+
2732
+
2733
+ # We define segment ordering by (left, right, node) tuples
2734
+ @dataclasses.dataclass(eq=True, order=True)
2735
+ class IdentitySegment:
2736
+ """
2737
+ A single segment of identity by descent spanning a genomic interval
2738
+ for a specific ancestor node.
2739
+ """
2740
+
2741
+ left: float
2742
+ """The left genomic coordinate (inclusive)."""
2743
+ right: float
2744
+ """The right genomic coordinate (exclusive)."""
2745
+ node: int
2746
+ """The ID of the most recent common ancestor node."""
2747
+
2748
+ @property
2749
+ def span(self) -> float:
2750
+ """
2751
+ The length of the genomic region spanned by this identity segment.
2752
+ """
2753
+ return self.right - self.left
2754
+
2755
+
2756
+ class IdentitySegmentList(collections.abc.Iterable, collections.abc.Sized):
2757
+ """
2758
+ A summary of identity-by-descent segments for some pair of samples in a
2759
+ :class:`.IdentitySegments` result. If the ``store_segments`` argument
2760
+ has been specified to :meth:`.TreeSequence.ibd_segments`, this class
2761
+ can be treated as a sequence of :class:`.IdentitySegment` objects.
2762
+
2763
+ Access to the segment data via numpy arrays is also available via
2764
+ the :attr:`.IdentitySegmentList.left`, :attr:`.IdentitySegmentList.right`
2765
+ and :attr:`.IdentitySegmentList.node` attributes.
2766
+
2767
+ If ``store_segments`` is False, only the overall summary values
2768
+ such as :attr:`.IdentitySegmentList.total_span` and ``len()`` are
2769
+ available. Attempting to iterate over the list or access per-segment
2770
+ arrays (``left``, ``right``, or ``node``) in this case will raise an
2771
+ ``IdentitySegmentsNotStoredError``.
2772
+
2773
+ .. warning:: The order of segments within an IdentitySegmentList is
2774
+ arbitrary and may change in the future
2775
+
2776
+ """
2777
+
2778
+ def __init__(self, ll_segment_list):
2779
+ self._ll_segment_list = ll_segment_list
2780
+
2781
+ def __iter__(self):
2782
+ for left, right, node in zip(self.left, self.right, self.node):
2783
+ yield IdentitySegment(float(left), float(right), int(node))
2784
+
2785
+ def __len__(self):
2786
+ return self._ll_segment_list.num_segments
2787
+
2788
+ def __str__(self):
2789
+ return (
2790
+ f"IdentitySegmentList(num_segments={len(self)}, "
2791
+ f"total_span={self.total_span})"
2792
+ )
2793
+
2794
+ def __repr__(self):
2795
+ return f"IdentitySegmentList({repr(list(self))})"
2796
+
2797
+ def __eq__(self, other):
2798
+ if not isinstance(other, IdentitySegmentList):
2799
+ return False
2800
+ return list(self) == list(other)
2801
+
2802
+ @property
2803
+ def total_span(self):
2804
+ """
2805
+ The total genomic span covered by segments in this list. Equal to
2806
+ ``sum(seg.span for seg in seglst)``.
2807
+ """
2808
+ return self._ll_segment_list.total_span
2809
+
2810
+ @property
2811
+ def left(self):
2812
+ """
2813
+ A numpy array (dtype=np.float64) of the ``left`` coordinates of segments.
2814
+ """
2815
+ return self._ll_segment_list.left
2816
+
2817
+ @property
2818
+ def right(self):
2819
+ """
2820
+ A numpy array (dtype=np.float64) of the ``right`` coordinates of segments.
2821
+ """
2822
+ return self._ll_segment_list.right
2823
+
2824
+ @property
2825
+ def node(self):
2826
+ """
2827
+ A numpy array (dtype=np.int32) of the MRCA node IDs in segments.
2828
+ """
2829
+ return self._ll_segment_list.node
2830
+
2831
+
2832
+ class IdentitySegments(collections.abc.Mapping):
2833
+ """
2834
+ A class summarising and optionally storing the segments of identity
2835
+ by descent returned by :meth:`.TreeSequence.ibd_segments`. See the
2836
+ :ref:`sec_identity` for more information and examples.
2837
+
2838
+ Along with the documented methods and attributes, the class supports
2839
+ the Python mapping protocol, and can be regarded as a dictionary
2840
+ mapping sample node pair tuples to the corresponding
2841
+ :class:`.IdentitySegmentList`.
2842
+
2843
+ .. note:: It is important to note that the facilities available
2844
+ for a given instance of this class are determined by the
2845
+ ``store_pairs`` and ``store_segments`` arguments provided to
2846
+ :meth:`.TreeSequence.ibd_segments`. For example, attempting
2847
+ to access per-sample pair information (such as indexing with
2848
+ ``[(a, b)]``, iterating over the mapping, or accessing
2849
+ :attr:`.IdentitySegments.pairs`) if ``store_pairs`` is False will
2850
+ result in an ``IdentityPairsNotStoredError`` being raised.
2851
+
2852
+ .. warning:: This class should not be instantiated directly.
2853
+ """
2854
+
2855
+ def __init__(self, ll_result, *, max_time, min_span, store_segments, store_pairs):
2856
+ self._ll_identity_segments = ll_result
2857
+ self.max_time = max_time
2858
+ self.min_span = min_span
2859
+ self.store_segments = store_segments
2860
+ self.store_pairs = store_pairs
2861
+
2862
+ @property
2863
+ def num_segments(self):
2864
+ """
2865
+ The total number of identity segments found.
2866
+ """
2867
+ return self._ll_identity_segments.num_segments
2868
+
2869
+ @property
2870
+ def num_pairs(self):
2871
+ """
2872
+ The total number of distinct sample pairs for which identity
2873
+ segments were found. (Only available when ``store_pairs`` or
2874
+ ``store_segments`` is specified).
2875
+ """
2876
+ return self._ll_identity_segments.num_pairs
2877
+
2878
+ @property
2879
+ def total_span(self):
2880
+ """
2881
+ The total genomic sequence length spanned by all identity
2882
+ segments that were found.
2883
+ """
2884
+ return self._ll_identity_segments.total_span
2885
+
2886
+ @property
2887
+ def pairs(self):
2888
+ """
2889
+ A numpy array with shape ``(segs.num_pairs, 2)`` and dtype=np.int32
2890
+ containing the sample pairs for which IBD segments were found.
2891
+ """
2892
+ return self._ll_identity_segments.get_keys()
2893
+
2894
+ # We have two different versions of repr - one where we list out the segments
2895
+ # for debugging, and the other that just shows the standard representation.
2896
+ # We could have repr fail if store_segments isn't true, but then printing,
2897
+ # e.g., a list of IdentitySegments objects would fail unexpectedly.
2898
+ def __repr__(self):
2899
+ if self.store_segments:
2900
+ return f"IdentitySegments({dict(self)})"
2901
+ return super().__repr__()
2902
+
2903
+ def __str__(self):
2904
+ # TODO it would be nice to add horizontal lines as
2905
+ # table separators to distinguish the two parts of the
2906
+ # table like suggested here:
2907
+ # https://github.com/tskit-dev/tskit/pull/1902#issuecomment-989943424
2908
+ rows = [
2909
+ ["Parameters:", ""],
2910
+ ["max_time", str(self.max_time)],
2911
+ ["min_span", str(self.min_span)],
2912
+ ["store_pairs", str(self.store_pairs)],
2913
+ ["store_segments", str(self.store_segments)],
2914
+ ["Results:", ""],
2915
+ ["num_segments", str(self.num_segments)],
2916
+ ["total_span", str(self.total_span)],
2917
+ ]
2918
+ if self.store_pairs:
2919
+ rows.append(["num_pairs", str(len(self))])
2920
+ return util.unicode_table(rows, title="IdentitySegments", row_separator=False)
2921
+
2922
+ def __getitem__(self, key):
2923
+ sample_a, sample_b = key
2924
+ return IdentitySegmentList(self._ll_identity_segments.get(sample_a, sample_b))
2925
+
2926
+ def __iter__(self):
2927
+ return map(tuple, self._ll_identity_segments.get_keys())
2928
+
2929
+ def __len__(self):
2930
+ return self.num_pairs
2931
+
2932
+
2933
+ # TODO move to reference_sequence.py when we start adding more functionality.
2934
+ class ReferenceSequence(metadata.MetadataProvider):
2935
+ """
2936
+ The :ref:`reference sequence<sec_data_model_reference_sequence>` associated
2937
+ with a given :class:`.TableCollection` or :class:`.TreeSequence`.
2938
+
2939
+ Metadata concerning reference sequences can be described using the
2940
+ :attr:`.metadata_schema` and stored in the :attr:`.metadata` attribute.
2941
+ See the :ref:`examples<sec_metadata_examples_reference_sequence>` for
2942
+ idiomatic usage.
2943
+
2944
+ .. warning:: This API is preliminary and currently only supports accessing
2945
+ reference sequence information via the ``.data`` attribute. Future versions
2946
+ will also enable transparent fetching of known reference sequences
2947
+ from a URL (see https://github.com/tskit-dev/tskit/issues/2022).
2948
+ """
2949
+
2950
+ def __init__(self, ll_reference_sequence):
2951
+ super().__init__(ll_reference_sequence)
2952
+ self._ll_reference_sequence = ll_reference_sequence
2953
+
2954
+ def is_null(self) -> bool:
2955
+ """
2956
+ Returns True if this :class:`.ReferenceSequence` is null, i.e.,
2957
+ all fields are empty.
2958
+ """
2959
+ return bool(self._ll_reference_sequence.is_null())
2960
+
2961
+ def clear(self):
2962
+ self.data = ""
2963
+ self.url = ""
2964
+ self.metadata_schema = tskit.MetadataSchema(None)
2965
+ self.metadata = b""
2966
+
2967
+ # https://github.com/tskit-dev/tskit/issues/1984
2968
+ # TODO add a __str__ method
2969
+ # TODO add a _repr_html_
2970
+ # FIXME This is a shortcut, we want to put the values in explicitly
2971
+ # here to get more control over how they are displayed.
2972
+ def __repr__(self):
2973
+ return f"ReferenceSequence({repr(self.asdict())})"
2974
+
2975
+ @property
2976
+ def data(self) -> str:
2977
+ """
2978
+ The string encoding of the reference sequence such that ``data[j]``
2979
+ represents the reference nucleotide at base ``j``. If this reference
2980
+ sequence is writable, the value can be assigned, e.g.
2981
+ ``tables.reference_sequence.data = "ACGT"``
2982
+ """
2983
+ return self._ll_reference_sequence.data
2984
+
2985
+ @data.setter
2986
+ def data(self, value):
2987
+ self._ll_reference_sequence.data = value
2988
+
2989
+ @property
2990
+ def url(self) -> str:
2991
+ return self._ll_reference_sequence.url
2992
+
2993
+ @url.setter
2994
+ def url(self, value):
2995
+ self._ll_reference_sequence.url = value
2996
+
2997
+ def asdict(self) -> dict:
2998
+ return {
2999
+ "metadata_schema": repr(self.metadata_schema),
3000
+ "metadata": self.metadata_bytes,
3001
+ "data": self.data,
3002
+ "url": self.url,
3003
+ }
3004
+
3005
+ def __eq__(self, other):
3006
+ return self.equals(other)
3007
+
3008
+ def equals(self, other, ignore_metadata=False):
3009
+ try:
3010
+ self.assert_equals(other, ignore_metadata)
3011
+ return True
3012
+ except AssertionError:
3013
+ return False
3014
+
3015
+ def assert_equals(self, other, ignore_metadata=False):
3016
+ if not ignore_metadata:
3017
+ super().assert_equals(other)
3018
+
3019
+ if self.data != other.data:
3020
+ raise AssertionError(
3021
+ f"Reference sequence data differs: self={self.data} "
3022
+ f"other={other.data}"
3023
+ )
3024
+ if self.url != other.url:
3025
+ raise AssertionError(
3026
+ f"Reference sequence url differs: self={self.url} " f"other={other.url}"
3027
+ )
3028
+
3029
+ @property
3030
+ def nbytes(self):
3031
+ # TODO this will be inefficient when we work with large references.
3032
+ # Make a dedicated low-level method for getting the length of data.
3033
+ return super().nbytes + len(self.url) + len(self.data)
3034
+
3035
+
3036
+ class TableCollection(metadata.MetadataProvider):
3037
+ """
3038
+ A collection of mutable tables defining a tree sequence. See the
3039
+ :ref:`sec_data_model` section for definition on the various tables
3040
+ and how they together define a :class:`TreeSequence`. Arbitrary
3041
+ data can be stored in a TableCollection, but there are certain
3042
+ :ref:`requirements <sec_valid_tree_sequence_requirements>` that must be
3043
+ satisfied for these tables to be interpreted as a tree sequence.
3044
+
3045
+ To obtain an immutable :class:`TreeSequence` instance corresponding to the
3046
+ current state of a ``TableCollection``, please use the :meth:`.tree_sequence`
3047
+ method.
3048
+ """
3049
+
3050
+ set_err_text = (
3051
+ "Cannot set tables in a table collection: use table.replace_with() instead."
3052
+ )
3053
+
3054
+ def __init__(self, sequence_length=0, *, ll_tables=None):
3055
+ self._ll_tables = ll_tables
3056
+ if ll_tables is None:
3057
+ self._ll_tables = _tskit.TableCollection(sequence_length)
3058
+ super().__init__(self._ll_tables)
3059
+ self._individuals = IndividualTable(ll_table=self._ll_tables.individuals)
3060
+ self._nodes = NodeTable(ll_table=self._ll_tables.nodes)
3061
+ self._edges = EdgeTable(ll_table=self._ll_tables.edges)
3062
+ self._migrations = MigrationTable(ll_table=self._ll_tables.migrations)
3063
+ self._sites = SiteTable(ll_table=self._ll_tables.sites)
3064
+ self._mutations = MutationTable(ll_table=self._ll_tables.mutations)
3065
+ self._populations = PopulationTable(ll_table=self._ll_tables.populations)
3066
+ self._provenances = ProvenanceTable(ll_table=self._ll_tables.provenances)
3067
+
3068
+ @property
3069
+ def individuals(self) -> IndividualTable:
3070
+ """
3071
+ The :ref:`sec_individual_table_definition` in this collection.
3072
+ """
3073
+ return self._individuals
3074
+
3075
+ @individuals.setter
3076
+ def individuals(self, value):
3077
+ raise AttributeError(self.set_err_text)
3078
+
3079
+ @property
3080
+ def nodes(self) -> NodeTable:
3081
+ """
3082
+ The :ref:`sec_node_table_definition` in this collection.
3083
+ """
3084
+ return self._nodes
3085
+
3086
+ @nodes.setter
3087
+ def nodes(self, value):
3088
+ raise AttributeError(self.set_err_text)
3089
+
3090
+ @property
3091
+ def edges(self) -> EdgeTable:
3092
+ """
3093
+ The :ref:`sec_edge_table_definition` in this collection.
3094
+ """
3095
+ return self._edges
3096
+
3097
+ @edges.setter
3098
+ def edges(self, value):
3099
+ raise AttributeError(self.set_err_text)
3100
+
3101
+ @property
3102
+ def migrations(self) -> MigrationTable:
3103
+ """
3104
+ The :ref:`sec_migration_table_definition` in this collection
3105
+ """
3106
+ return self._migrations
3107
+
3108
+ @migrations.setter
3109
+ def migrations(self, value):
3110
+ raise AttributeError(self.set_err_text)
3111
+
3112
+ @property
3113
+ def sites(self) -> SiteTable:
3114
+ """
3115
+ The :ref:`sec_site_table_definition` in this collection.
3116
+ """
3117
+ return self._sites
3118
+
3119
+ @sites.setter
3120
+ def sites(self, value):
3121
+ raise AttributeError(self.set_err_text)
3122
+
3123
+ @property
3124
+ def mutations(self) -> MutationTable:
3125
+ """
3126
+ The :ref:`sec_mutation_table_definition` in this collection.
3127
+ """
3128
+ return self._mutations
3129
+
3130
+ @mutations.setter
3131
+ def mutations(self, value):
3132
+ raise AttributeError(self.set_err_text)
3133
+
3134
+ @property
3135
+ def populations(self) -> PopulationTable:
3136
+ """
3137
+ The :ref:`sec_population_table_definition` in this collection.
3138
+ """
3139
+ return self._populations
3140
+
3141
+ @populations.setter
3142
+ def populations(self, value):
3143
+ raise AttributeError(self.set_err_text)
3144
+
3145
+ @property
3146
+ def provenances(self) -> ProvenanceTable:
3147
+ """
3148
+ The :ref:`sec_provenance_table_definition` in this collection.
3149
+ """
3150
+ return self._provenances
3151
+
3152
+ @provenances.setter
3153
+ def provenances(self, value):
3154
+ raise AttributeError(self.set_err_text)
3155
+
3156
+ @property
3157
+ def indexes(self) -> TableCollectionIndexes:
3158
+ """
3159
+ The edge insertion and removal indexes.
3160
+ """
3161
+ indexes = self._ll_tables.indexes
3162
+ return TableCollectionIndexes(**indexes)
3163
+
3164
+ @indexes.setter
3165
+ def indexes(self, indexes):
3166
+ self._ll_tables.indexes = indexes.asdict()
3167
+
3168
+ @property
3169
+ def sequence_length(self) -> float:
3170
+ """
3171
+ The sequence length defining the coordinate space.
3172
+ """
3173
+ return self._ll_tables.sequence_length
3174
+
3175
+ @sequence_length.setter
3176
+ def sequence_length(self, sequence_length):
3177
+ self._ll_tables.sequence_length = sequence_length
3178
+
3179
+ @property
3180
+ def file_uuid(self) -> str:
3181
+ """
3182
+ The UUID for the file this TableCollection is derived
3183
+ from, or None if not derived from a file.
3184
+ """
3185
+ return self._ll_tables.file_uuid
3186
+
3187
+ @property
3188
+ def time_units(self) -> str:
3189
+ """
3190
+ The units used for the time dimension of this TableCollection
3191
+ """
3192
+ return self._ll_tables.time_units
3193
+
3194
+ @time_units.setter
3195
+ def time_units(self, time_units: str) -> None:
3196
+ self._ll_tables.time_units = time_units
3197
+
3198
+ def has_reference_sequence(self):
3199
+ """
3200
+ Returns True if this :class:`.TableCollection` has an associated
3201
+ :ref:`reference sequence<sec_data_model_reference_sequence>`.
3202
+ """
3203
+ return bool(self._ll_tables.has_reference_sequence())
3204
+
3205
+ @property
3206
+ def reference_sequence(self):
3207
+ """
3208
+ The :class:`.ReferenceSequence` associated with this :class:`.TableCollection`.
3209
+
3210
+ .. note:: Note that the behaviour of this attribute differs from
3211
+ :attr:`.TreeSequence.reference_sequence` in that we return a valid
3212
+ instance of :class:`.ReferenceSequence` even when
3213
+ :attr:`.TableCollection.has_reference_sequence` is False. This is
3214
+ to allow us to update the state of the reference sequence.
3215
+ """
3216
+ # NOTE: arguably we should cache the reference to this object
3217
+ # during init, rather than creating a new instance each time.
3218
+ # However, following the pattern of the Table classes for now
3219
+ # for consistency.
3220
+ return ReferenceSequence(self._ll_tables.reference_sequence)
3221
+
3222
+ @reference_sequence.setter
3223
+ def reference_sequence(self, value: ReferenceSequence):
3224
+ self.reference_sequence.metadata_schema = value.metadata_schema
3225
+ self.reference_sequence.metadata = value.metadata
3226
+ self.reference_sequence.data = value.data
3227
+ self.reference_sequence.url = value.url
3228
+
3229
+ def asdict(self, force_offset_64=False):
3230
+ """
3231
+ Returns the nested dictionary representation of this TableCollection
3232
+ used for interchange.
3233
+
3234
+ Note: the semantics of this method changed at tskit 0.1.0. Previously a
3235
+ map of table names to the tables themselves was returned.
3236
+
3237
+ :param bool force_offset_64: If True, all offset columns will have dtype
3238
+ np.uint64. If False (the default) the offset array columns will have
3239
+ a dtype of either np.uint32 or np.uint64, depending on the size of the
3240
+ corresponding data array.
3241
+ :return: The dictionary representation of this table collection.
3242
+ :rtype: dict
3243
+ """
3244
+ return self._ll_tables.asdict(force_offset_64)
3245
+
3246
+ @property
3247
+ def table_name_map(self) -> dict:
3248
+ """
3249
+ Returns a dictionary mapping table names to the corresponding
3250
+ table instances. For example, the returned dictionary will contain the
3251
+ key "edges" that maps to an :class:`.EdgeTable` instance.
3252
+ """
3253
+ return {
3254
+ "edges": self.edges,
3255
+ "individuals": self.individuals,
3256
+ "migrations": self.migrations,
3257
+ "mutations": self.mutations,
3258
+ "nodes": self.nodes,
3259
+ "populations": self.populations,
3260
+ "provenances": self.provenances,
3261
+ "sites": self.sites,
3262
+ }
3263
+
3264
+ @property
3265
+ def name_map(self) -> dict:
3266
+ # Deprecated in 0.4.1
3267
+ warnings.warn(
3268
+ "name_map is deprecated; use table_name_map instead",
3269
+ FutureWarning,
3270
+ stacklevel=4,
3271
+ )
3272
+ return self.table_name_map
3273
+
3274
+ @property
3275
+ def nbytes(self) -> int:
3276
+ """
3277
+ Returns the total number of bytes required to store the data
3278
+ in this table collection. Note that this may not be equal to
3279
+ the actual memory footprint.
3280
+ """
3281
+ return sum(
3282
+ (
3283
+ 8, # sequence_length takes 8 bytes
3284
+ super().nbytes, # metadata
3285
+ len(self.time_units.encode()),
3286
+ self.indexes.nbytes,
3287
+ self.reference_sequence.nbytes,
3288
+ sum(table.nbytes for table in self.table_name_map.values()),
3289
+ )
3290
+ )
3291
+
3292
+ def __str__(self):
3293
+ """
3294
+ Return a plain text summary of this TableCollection
3295
+ """
3296
+ return "\n".join(
3297
+ [
3298
+ "TableCollection",
3299
+ "",
3300
+ f"Sequence Length: {self.sequence_length}",
3301
+ f"Time units: {self.time_units}",
3302
+ f"Metadata: {self.metadata}",
3303
+ "",
3304
+ "Individuals",
3305
+ str(self.individuals),
3306
+ "Nodes",
3307
+ str(self.nodes),
3308
+ "Edges",
3309
+ str(self.edges),
3310
+ "Sites",
3311
+ str(self.sites),
3312
+ "Mutations",
3313
+ str(self.mutations),
3314
+ "Migrations",
3315
+ str(self.migrations),
3316
+ "Populations",
3317
+ str(self.populations),
3318
+ "Provenances",
3319
+ str(self.provenances),
3320
+ ]
3321
+ )
3322
+
3323
+ def equals(
3324
+ self,
3325
+ other,
3326
+ *,
3327
+ ignore_metadata=False,
3328
+ ignore_ts_metadata=False,
3329
+ ignore_provenance=False,
3330
+ ignore_timestamps=False,
3331
+ ignore_tables=False,
3332
+ ignore_reference_sequence=False,
3333
+ ):
3334
+ """
3335
+ Returns True if `self` and `other` are equal. By default, two table
3336
+ collections are considered equal if their
3337
+
3338
+ - ``sequence_length`` properties are identical;
3339
+ - top-level tree sequence metadata and metadata schemas are
3340
+ byte-wise identical;
3341
+ - constituent tables are byte-wise identical.
3342
+
3343
+ Some of the requirements in this definition can be relaxed using the
3344
+ parameters, which can be used to remove certain parts of the data model
3345
+ from the comparison.
3346
+
3347
+ Table indexes are not considered in the equality comparison.
3348
+
3349
+ :param TableCollection other: Another table collection.
3350
+ :param bool ignore_metadata: If True *all* metadata and metadata schemas
3351
+ will be excluded from the comparison. This includes the top-level
3352
+ tree sequence and constituent table metadata (default=False).
3353
+ :param bool ignore_ts_metadata: If True the top-level tree sequence
3354
+ metadata and metadata schemas will be excluded from the comparison.
3355
+ If ``ignore_metadata`` is True, this parameter has no effect.
3356
+ :param bool ignore_provenance: If True the provenance tables are
3357
+ not included in the comparison.
3358
+ :param bool ignore_timestamps: If True the provenance timestamp column
3359
+ is ignored in the comparison. If ``ignore_provenance`` is True, this
3360
+ parameter has no effect.
3361
+ :param bool ignore_tables: If True no tables are included in the
3362
+ comparison, thus comparing only the top-level information.
3363
+ :param bool ignore_reference_sequence: If True the reference sequence
3364
+ is not included in the comparison.
3365
+ :return: True if other is equal to this table collection; False otherwise.
3366
+ :rtype: bool
3367
+ """
3368
+ if self is other:
3369
+ return True
3370
+
3371
+ ret = False
3372
+ if type(other) is type(self):
3373
+ ret = bool(
3374
+ self._ll_tables.equals(
3375
+ other._ll_tables,
3376
+ ignore_metadata=bool(ignore_metadata),
3377
+ ignore_ts_metadata=bool(ignore_ts_metadata),
3378
+ ignore_provenance=bool(ignore_provenance),
3379
+ ignore_timestamps=bool(ignore_timestamps),
3380
+ ignore_tables=bool(ignore_tables),
3381
+ ignore_reference_sequence=bool(ignore_reference_sequence),
3382
+ )
3383
+ )
3384
+ elif hasattr(other, "_llts") and not hasattr(other, "_ll_tables"):
3385
+ ret = other.equals(
3386
+ self,
3387
+ ignore_metadata=ignore_metadata,
3388
+ ignore_ts_metadata=ignore_ts_metadata,
3389
+ ignore_provenance=ignore_provenance,
3390
+ ignore_timestamps=ignore_timestamps,
3391
+ ignore_tables=ignore_tables,
3392
+ ignore_reference_sequence=ignore_reference_sequence,
3393
+ )
3394
+ return ret
3395
+
3396
+ def assert_equals(
3397
+ self,
3398
+ other,
3399
+ *,
3400
+ ignore_metadata=False,
3401
+ ignore_ts_metadata=False,
3402
+ ignore_provenance=False,
3403
+ ignore_timestamps=False,
3404
+ ignore_tables=False,
3405
+ ignore_reference_sequence=False,
3406
+ ):
3407
+ """
3408
+ Raise an AssertionError for the first found difference between
3409
+ this and another table collection. Note that table indexes are not checked.
3410
+
3411
+ :param other: Another table collection (TableCollection or
3412
+ ImmutableTableCollection).
3413
+ :param bool ignore_metadata: If True *all* metadata and metadata schemas
3414
+ will be excluded from the comparison. This includes the top-level
3415
+ tree sequence and constituent table metadata (default=False).
3416
+ :param bool ignore_ts_metadata: If True the top-level tree sequence
3417
+ metadata and metadata schemas will be excluded from the comparison.
3418
+ If ``ignore_metadata`` is True, this parameter has no effect.
3419
+ :param bool ignore_provenance: If True the provenance tables are
3420
+ not included in the comparison.
3421
+ :param bool ignore_timestamps: If True the provenance timestamp column
3422
+ is ignored in the comparison. If ``ignore_provenance`` is True, this
3423
+ parameter has no effect.
3424
+ :param bool ignore_tables: If True no tables are included in the
3425
+ comparison, thus comparing only the top-level information.
3426
+ :param bool ignore_reference_sequence: If True the reference sequence
3427
+ is not included in the comparison.
3428
+ """
3429
+ # Check using the low-level method to avoid slowly going through everything
3430
+ if type(other) is type(self) and self.equals(
3431
+ other,
3432
+ ignore_metadata=ignore_metadata,
3433
+ ignore_ts_metadata=ignore_ts_metadata,
3434
+ ignore_provenance=ignore_provenance,
3435
+ ignore_timestamps=ignore_timestamps,
3436
+ ignore_tables=ignore_tables,
3437
+ ignore_reference_sequence=ignore_reference_sequence,
3438
+ ):
3439
+ return
3440
+
3441
+ valid_types = (TableCollection, ImmutableTableCollection)
3442
+ if not isinstance(other, valid_types):
3443
+ raise AssertionError(f"Types differ: self={type(self)} other={type(other)}")
3444
+
3445
+ _assert_table_collections_equal(
3446
+ self,
3447
+ other,
3448
+ ignore_metadata=ignore_metadata,
3449
+ ignore_ts_metadata=ignore_ts_metadata,
3450
+ ignore_provenance=ignore_provenance,
3451
+ ignore_timestamps=ignore_timestamps,
3452
+ ignore_tables=ignore_tables,
3453
+ ignore_reference_sequence=ignore_reference_sequence,
3454
+ )
3455
+
3456
+ def __eq__(self, other):
3457
+ return self.equals(other)
3458
+
3459
+ def __getstate__(self):
3460
+ return self.asdict()
3461
+
3462
+ @classmethod
3463
+ def load(cls, file_or_path, *, skip_tables=False, skip_reference_sequence=False):
3464
+ file, local_file = util.convert_file_like_to_open_file(file_or_path, "rb")
3465
+ ll_tc = _tskit.TableCollection()
3466
+ try:
3467
+ ll_tc.load(
3468
+ file,
3469
+ skip_tables=skip_tables,
3470
+ skip_reference_sequence=skip_reference_sequence,
3471
+ )
3472
+ return TableCollection(ll_tables=ll_tc)
3473
+ except tskit.FileFormatError as e:
3474
+ util.raise_known_file_format_errors(file, e)
3475
+ finally:
3476
+ if local_file:
3477
+ file.close()
3478
+
3479
+ def dump(self, file_or_path):
3480
+ """
3481
+ Writes the table collection to the specified path or file object.
3482
+
3483
+ :param str file_or_path: The file object or path to write the TreeSequence to.
3484
+ """
3485
+ file, local_file = util.convert_file_like_to_open_file(file_or_path, "wb")
3486
+ try:
3487
+ self._ll_tables.dump(file)
3488
+ finally:
3489
+ if local_file:
3490
+ file.close()
3491
+
3492
+ # Unpickle support
3493
+ def __setstate__(self, state):
3494
+ self.__init__()
3495
+ self._ll_tables.fromdict(state)
3496
+
3497
+ @classmethod
3498
+ def fromdict(self, tables_dict):
3499
+ ll_tc = _tskit.TableCollection()
3500
+ ll_tc.fromdict(tables_dict)
3501
+ return TableCollection(ll_tables=ll_tc)
3502
+
3503
+ def copy(self):
3504
+ """
3505
+ Returns a deep copy of this TableCollection.
3506
+
3507
+ :return: A deep copy of this TableCollection.
3508
+ :rtype: tskit.TableCollection
3509
+ """
3510
+ return TableCollection.fromdict(self.asdict())
3511
+
3512
+ def tree_sequence(self):
3513
+ """
3514
+ Returns a :class:`TreeSequence` instance from the tables defined in this
3515
+ :class:`TableCollection`, building the required indexes if they have not yet
3516
+ been created by :meth:`.build_index`. If the table collection does not meet
3517
+ the :ref:`sec_valid_tree_sequence_requirements`, for example if the tables
3518
+ are not correctly sorted or if they cannot be interpreted as a tree sequence,
3519
+ an exception is raised. Note that in the former case, the :meth:`.sort`
3520
+ method may be used to ensure that sorting requirements are met.
3521
+
3522
+ :return: A :class:`TreeSequence` instance reflecting the structures
3523
+ defined in this set of tables.
3524
+ :rtype: tskit.TreeSequence
3525
+ """
3526
+ if not self.has_index():
3527
+ self.build_index()
3528
+ return tskit.TreeSequence.load_tables(self)
3529
+
3530
+ def simplify(
3531
+ self,
3532
+ samples=None,
3533
+ *,
3534
+ reduce_to_site_topology=False,
3535
+ filter_populations=None,
3536
+ filter_individuals=None,
3537
+ filter_sites=None,
3538
+ filter_nodes=None,
3539
+ update_sample_flags=None,
3540
+ keep_unary=False,
3541
+ keep_unary_in_individuals=None,
3542
+ keep_input_roots=False,
3543
+ record_provenance=True,
3544
+ filter_zero_mutation_sites=None, # Deprecated alias for filter_sites
3545
+ ):
3546
+ """
3547
+ Simplifies the tables in place to retain only the information necessary
3548
+ to reconstruct the tree sequence describing the given ``samples``.
3549
+ If ``filter_nodes`` is True (the default), this can change the ID of
3550
+ the nodes, so that the node ``samples[k]`` will have ID ``k`` in the
3551
+ result, resulting in a NodeTable where only the first ``len(samples)``
3552
+ nodes are marked as samples. The mapping from node IDs in the current
3553
+ set of tables to their equivalent values in the simplified tables is
3554
+ returned as a numpy array. If an array ``a`` is returned by this
3555
+ function and ``u`` is the ID of a node in the input table, then
3556
+ ``a[u]`` is the ID of this node in the output table. For any node ``u``
3557
+ that is not mapped into the output tables, this mapping will equal
3558
+ ``tskit.NULL`` (``-1``).
3559
+
3560
+ Tables operated on by this function must: be sorted (see
3561
+ :meth:`TableCollection.sort`), have children be born strictly after their
3562
+ parents, and the intervals on which any node is a child must be
3563
+ disjoint. Other than this the tables need not satisfy remaining
3564
+ requirements to specify a valid tree sequence (but the resulting tables
3565
+ will).
3566
+
3567
+ .. note::
3568
+ To invert the returned ``node_map``, that is, to obtain a reverse
3569
+ mapping from the node ID in the output table to the node ID in
3570
+ the input table, you can use::
3571
+
3572
+ rev_map = np.zeros_like(node_map, shape=simplified_ts.num_nodes)
3573
+ kept = node_map != tskit.NULL
3574
+ rev_map[node_map[kept]] = np.arange(len(node_map))[kept]
3575
+
3576
+ In this case, no elements of the ``rev_map`` array will be set to
3577
+ ``tskit.NULL``.
3578
+
3579
+ .. seealso::
3580
+ This is identical to :meth:`TreeSequence.simplify` but acts *in place* to
3581
+ alter the data in this :class:`TableCollection`. Please see the
3582
+ :meth:`TreeSequence.simplify` method for a description of the remaining
3583
+ parameters.
3584
+
3585
+ :param list[int] samples: A list of node IDs to retain as samples. They
3586
+ need not be nodes marked as samples in the original tree sequence, but
3587
+ will constitute the entire set of samples in the returned tree sequence.
3588
+ If not specified or None, use all nodes marked with the IS_SAMPLE flag.
3589
+ The list may be provided as a numpy array (or array-like) object
3590
+ (dtype=np.int32).
3591
+ :param bool reduce_to_site_topology: Whether to reduce the topology down
3592
+ to the trees that are present at sites. (Default: False).
3593
+ :param bool filter_populations: If True, remove any populations that are
3594
+ not referenced by nodes after simplification; new population IDs are
3595
+ allocated sequentially from zero. If False, the population table will
3596
+ not be altered in any way. (Default: None, treated as True)
3597
+ :param bool filter_individuals: If True, remove any individuals that are
3598
+ not referenced by nodes after simplification; new individual IDs are
3599
+ allocated sequentially from zero. If False, the individual table will
3600
+ not be altered in any way. (Default: None, treated as True)
3601
+ :param bool filter_sites: If True, remove any sites that are
3602
+ not referenced by mutations after simplification; new site IDs are
3603
+ allocated sequentially from zero. If False, the site table will not
3604
+ be altered in any way. (Default: None, treated as True)
3605
+ :param bool filter_nodes: If True, remove any nodes that are
3606
+ not referenced by edges after simplification. If False, the only
3607
+ potential change to the node table may be to change the node flags
3608
+ (if ``samples`` is specified and different from the existing samples).
3609
+ (Default: None, treated as True)
3610
+ :param bool update_sample_flags: If True, update node flags to so that
3611
+ nodes in the specified list of samples have the NODE_IS_SAMPLE
3612
+ flag after simplification, and nodes that are not in this list
3613
+ do not. (Default: None, treated as True)
3614
+ :param bool keep_unary: If True, preserve unary nodes (i.e. nodes with
3615
+ exactly one child) that exist on the path from samples to root.
3616
+ (Default: False)
3617
+ :param bool keep_unary_in_individuals: If True, preserve unary nodes
3618
+ that exist on the path from samples to root, but only if they are
3619
+ associated with an individual in the individuals table. Cannot be
3620
+ specified at the same time as ``keep_unary``. (Default: ``None``,
3621
+ equivalent to False)
3622
+ :param bool keep_input_roots: Whether to retain history ancestral to the
3623
+ MRCA of the samples. If ``False``, no topology older than the MRCAs of the
3624
+ samples will be included. If ``True`` the roots of all trees in the returned
3625
+ tree sequence will be the same roots as in the original tree sequence.
3626
+ (Default: False)
3627
+ :param bool record_provenance: If True, record details of this call to
3628
+ simplify in the returned tree sequence's provenance information
3629
+ (Default: True).
3630
+ :param bool filter_zero_mutation_sites: Deprecated alias for ``filter_sites``.
3631
+ :return: A numpy array mapping node IDs in the input tables to their
3632
+ corresponding node IDs in the output tables.
3633
+ :rtype: numpy.ndarray (dtype=np.int32)
3634
+ """
3635
+ if filter_zero_mutation_sites is not None:
3636
+ # Deprecated in msprime 0.6.1.
3637
+ warnings.warn(
3638
+ "filter_zero_mutation_sites is deprecated; use filter_sites instead",
3639
+ FutureWarning,
3640
+ stacklevel=4,
3641
+ )
3642
+ filter_sites = filter_zero_mutation_sites
3643
+ if samples is None:
3644
+ flags = self.nodes.flags
3645
+ samples = np.where(np.bitwise_and(flags, _tskit.NODE_IS_SAMPLE) != 0)[
3646
+ 0
3647
+ ].astype(np.int32)
3648
+ else:
3649
+ samples = util.safe_np_int_cast(samples, np.int32)
3650
+ if filter_populations is None:
3651
+ filter_populations = True
3652
+ if filter_individuals is None:
3653
+ filter_individuals = True
3654
+ if filter_sites is None:
3655
+ filter_sites = True
3656
+ if filter_nodes is None:
3657
+ filter_nodes = True
3658
+ if update_sample_flags is None:
3659
+ update_sample_flags = True
3660
+ if keep_unary_in_individuals is None:
3661
+ keep_unary_in_individuals = False
3662
+
3663
+ node_map = self._ll_tables.simplify(
3664
+ samples,
3665
+ filter_sites=filter_sites,
3666
+ filter_individuals=filter_individuals,
3667
+ filter_populations=filter_populations,
3668
+ filter_nodes=filter_nodes,
3669
+ update_sample_flags=update_sample_flags,
3670
+ reduce_to_site_topology=reduce_to_site_topology,
3671
+ keep_unary=keep_unary,
3672
+ keep_unary_in_individuals=keep_unary_in_individuals,
3673
+ keep_input_roots=keep_input_roots,
3674
+ )
3675
+ if record_provenance:
3676
+ # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243
3677
+ # TODO also make sure we convert all the arguments so that they are
3678
+ # definitely JSON encodable.
3679
+ parameters = {"command": "simplify", "TODO": "add simplify parameters"}
3680
+ self.provenances.add_row(
3681
+ record=json.dumps(provenance.get_provenance_dict(parameters))
3682
+ )
3683
+ return node_map
3684
+
3685
+ def link_ancestors(self, samples, ancestors):
3686
+ """
3687
+ Returns an :class:`EdgeTable` instance describing a subset of the genealogical
3688
+ relationships between the nodes in ``samples`` and ``ancestors``.
3689
+
3690
+ Each row ``parent, child, left, right`` in the output table indicates that
3691
+ ``child`` has inherited the segment ``[left, right)`` from ``parent`` more
3692
+ recently than from any other node in these lists.
3693
+
3694
+ In particular, suppose ``samples`` is a list of nodes such that ``time`` is 0
3695
+ for each node, and ``ancestors`` is a list of nodes such that ``time`` is
3696
+ greater than 0.0 for each node. Then each row of the output table will show
3697
+ an interval ``[left, right)`` over which a node in ``samples`` has inherited
3698
+ most recently from a node in ``ancestors``, or an interval over which one of
3699
+ these ``ancestors`` has inherited most recently from another node in
3700
+ ``ancestors``.
3701
+
3702
+ The following table shows which ``parent->child`` pairs will be shown in the
3703
+ output of ``link_ancestors``.
3704
+ A node is a relevant descendant on a given interval if it also appears somewhere
3705
+ in the ``parent`` column of the outputted table.
3706
+
3707
+ ======================== ===============================================
3708
+ Type of relationship Shown in output of ``link_ancestors``
3709
+ ------------------------ -----------------------------------------------
3710
+ ``ancestor->sample`` Always
3711
+ ``ancestor1->ancestor2`` Only if ``ancestor2`` has a relevant descendant
3712
+ ``sample1->sample2`` Always
3713
+ ``sample->ancestor`` Only if ``ancestor`` has a relevant descendant
3714
+ ======================== ===============================================
3715
+
3716
+ The difference between ``samples`` and ``ancestors`` is that information about
3717
+ the ancestors of a node in ``ancestors`` will only be retained if it also has a
3718
+ relevant descendant, while information about the ancestors of a node in
3719
+ ``samples`` will always be retained.
3720
+ The node IDs in ``parent`` and ``child`` refer to the IDs in the node table
3721
+ of the inputted tree sequence.
3722
+
3723
+ The supplied nodes must be non-empty lists of the node IDs in the tree sequence:
3724
+ in particular, they do not have to be *samples* of the tree sequence. The lists
3725
+ of ``samples`` and ``ancestors`` may overlap, although adding a node from
3726
+ ``samples`` to ``ancestors`` will not change the output. So, setting ``samples``
3727
+ and ``ancestors`` to the same list of nodes will find all genealogical
3728
+ relationships within this list.
3729
+
3730
+ If none of the nodes in ``ancestors`` or ``samples`` are ancestral to ``samples``
3731
+ anywhere in the tree sequence, an empty table will be returned.
3732
+
3733
+ :param list[int] samples: A list of node IDs to retain as samples.
3734
+ :param list[int] ancestors: A list of node IDs to use as ancestors.
3735
+ :return: An :class:`EdgeTable` instance displaying relationships between
3736
+ the `samples` and `ancestors`.
3737
+ """
3738
+ samples = util.safe_np_int_cast(samples, np.int32)
3739
+ ancestors = util.safe_np_int_cast(ancestors, np.int32)
3740
+ ll_edge_table = self._ll_tables.link_ancestors(samples, ancestors)
3741
+ return EdgeTable(ll_table=ll_edge_table)
3742
+
3743
+ def map_ancestors(self, *args, **kwargs):
3744
+ # A deprecated alias for link_ancestors()
3745
+ return self.link_ancestors(*args, **kwargs)
3746
+
3747
+ def sort(self, edge_start=0, *, site_start=0, mutation_start=0):
3748
+ """
3749
+ Sorts the tables in place. This ensures that all tree sequence ordering
3750
+ requirements listed in the
3751
+ :ref:`sec_valid_tree_sequence_requirements` section are met, as long
3752
+ as each site has at most one mutation (see below).
3753
+
3754
+ If the ``edge_start`` parameter is provided, this specifies the index
3755
+ in the edge table where sorting should start. Only rows with index
3756
+ greater than or equal to ``edge_start`` are sorted; rows before this index
3757
+ are not affected. This parameter is provided to allow for efficient sorting
3758
+ when the user knows that the edges up to a given index are already sorted.
3759
+
3760
+ If both ``site_start`` and ``mutation_start`` are equal to the number of rows
3761
+ in their retrospective tables then neither is sorted. Note that a partial
3762
+ non-sorting is not possible, and both or neither must be skipped.
3763
+
3764
+ The node, individual, population and provenance tables are not affected
3765
+ by this method.
3766
+
3767
+ Edges are sorted as follows:
3768
+
3769
+ - time of parent, then
3770
+ - parent node ID, then
3771
+ - child node ID, then
3772
+ - left endpoint.
3773
+
3774
+ Note that this sorting order exceeds the
3775
+ :ref:`edge sorting requirements <sec_edge_requirements>` for a valid
3776
+ tree sequence. For a valid tree sequence, we require that all edges for a
3777
+ given parent ID are adjacent, but we do not require that they be listed in
3778
+ sorted order.
3779
+
3780
+ Sites are sorted by position, and sites with the same position retain
3781
+ their relative ordering.
3782
+
3783
+ Mutations are sorted by site, then time (if known), then the mutation's
3784
+ node's time, then number of descendant mutations (ensuring that parent
3785
+ mutations occur before children), then node, then original order in the
3786
+ tables.
3787
+
3788
+ Migrations are sorted by ``time``, ``source``, ``dest``, ``left`` and
3789
+ ``node`` values. This defines a total sort order, such that any permutation
3790
+ of a valid migration table will be sorted into the same output order.
3791
+ Note that this sorting order exceeds the
3792
+ :ref:`migration sorting requirements <sec_migration_requirements>` for a
3793
+ valid tree sequence, which only requires that migrations are sorted by
3794
+ time value.
3795
+
3796
+ :param int edge_start: The index in the edge table where sorting starts
3797
+ (default=0; must be <= len(edges)).
3798
+ :param int site_start: The index in the site table where sorting starts
3799
+ (default=0; must be one of [0, len(sites)]).
3800
+ :param int mutation_start: The index in the mutation table where sorting starts
3801
+ (default=0; must be one of [0, len(mutations)]).
3802
+ """
3803
+ self._ll_tables.sort(edge_start, site_start, mutation_start)
3804
+ # TODO add provenance
3805
+
3806
+ def sort_individuals(self):
3807
+ """
3808
+ Sorts the individual table in place, so that parents come before children,
3809
+ and the parent column is remapped as required. Node references to individuals
3810
+ are also updated. This is a stricter order than is required for a valid tree
3811
+ sequence.
3812
+ """
3813
+ self._ll_tables.sort_individuals()
3814
+ # TODO add provenance
3815
+
3816
+ def canonicalise(self, remove_unreferenced=None):
3817
+ """
3818
+ This puts the tables in *canonical* form, imposing a stricter order on the
3819
+ tables than :ref:`required <sec_valid_tree_sequence_requirements>` for
3820
+ a valid tree sequence. In particular, the population table is sorted to
3821
+ place populations with the lowest node IDs first, and the individual table
3822
+ is sorted firstly as in :meth:`.sort_individuals` and secondarily
3823
+ by the lowest ID of the nodes that refer to each individual
3824
+ (see :meth:`TreeSequence.subset`). The remaining tables are sorted
3825
+ as in :meth:`.sort`, with the modification that mutations are sorted by
3826
+ site, then time (if known), then the mutation's node's time, then number
3827
+ of descendant mutations (ensuring that parent mutations occur before
3828
+ children), then node, then original order in the tables. This ensures
3829
+ that any two tables with the same information
3830
+ and node order should be identical after canonical sorting (note
3831
+ that no canonical order exists for the node table).
3832
+
3833
+ By default, the method removes sites, individuals, and populations that
3834
+ are not referenced (by mutations and nodes, respectively). If you wish
3835
+ to keep these, pass ``remove_unreferenced=False``, but note that
3836
+ unreferenced individuals and populations are put at the end of the tables
3837
+ in their original order.
3838
+
3839
+ .. seealso::
3840
+
3841
+ :meth:`.sort` for sorting edges, mutations, and sites, and
3842
+ :meth:`.subset` for reordering nodes, individuals, and populations.
3843
+
3844
+ :param bool remove_unreferenced: Whether to remove unreferenced sites,
3845
+ individuals, and populations (default=True).
3846
+ """
3847
+ remove_unreferenced = (
3848
+ True if remove_unreferenced is None else remove_unreferenced
3849
+ )
3850
+ self._ll_tables.canonicalise(remove_unreferenced=remove_unreferenced)
3851
+ # TODO add provenance
3852
+
3853
+ def compute_mutation_parents(self):
3854
+ """
3855
+ Modifies the tables in place, computing the ``parent`` column of the
3856
+ mutation table. For this to work, the node and edge tables must be
3857
+ valid, and the site and mutation tables must be sorted (see
3858
+ :meth:`TableCollection.sort`). This will produce an error if mutations
3859
+ are not sorted (i.e., if a mutation appears before its mutation parent)
3860
+ *unless* the two mutations occur on the same branch, and have unknown times
3861
+ in which case there is no way to detect the error.
3862
+
3863
+ The ``parent`` of a given mutation is the ID of the next mutation
3864
+ encountered traversing the tree upwards from that mutation, or
3865
+ ``NULL`` if there is no such mutation.
3866
+ """
3867
+ self._ll_tables.compute_mutation_parents()
3868
+ # TODO add provenance
3869
+
3870
+ def compute_mutation_times(self):
3871
+ """
3872
+ Modifies the tables in place, computing valid values for the ``time`` column of
3873
+ the mutation table. For this to work, the node and edge tables must be
3874
+ valid, and the site and mutation tables must be sorted and indexed(see
3875
+ :meth:`TableCollection.sort` and :meth:`TableCollection.build_index`).
3876
+
3877
+ For a single mutation on an edge at a site, the ``time`` assigned to a mutation
3878
+ by this method is the mid-point between the times of the nodes above and below
3879
+ the mutation. In the case where there is more than one mutation on an edge for
3880
+ a site, the times are evenly spread along the edge. For mutations that are
3881
+ above a root node, the time of the root node is assigned.
3882
+
3883
+ The mutation table will be sorted if the new times mean that the original order
3884
+ is no longer valid.
3885
+
3886
+ """
3887
+ self._ll_tables.compute_mutation_times()
3888
+ # TODO add provenance
3889
+
3890
+ def deduplicate_sites(self):
3891
+ """
3892
+ Modifies the tables in place, removing entries in the site table with
3893
+ duplicate ``position`` (and keeping only the *first* entry for each
3894
+ site), and renumbering the ``site`` column of the mutation table
3895
+ appropriately. This requires the site table to be sorted by position.
3896
+
3897
+ .. warning:: This method does not sort the tables afterwards, so
3898
+ mutations may no longer be sorted by time.
3899
+ """
3900
+ self._ll_tables.deduplicate_sites()
3901
+ # TODO add provenance
3902
+
3903
+ def delete_sites(self, site_ids, record_provenance=True):
3904
+ """
3905
+ Remove the specified sites entirely from the sites and mutations tables in this
3906
+ collection. This is identical to :meth:`TreeSequence.delete_sites` but acts
3907
+ *in place* to alter the data in this :class:`TableCollection`.
3908
+
3909
+ :param list[int] site_ids: A list of site IDs specifying the sites to remove.
3910
+ :param bool record_provenance: If ``True``, add details of this operation
3911
+ to the provenance table in this TableCollection. (Default: ``True``).
3912
+ """
3913
+ keep_sites = np.ones(len(self.sites), dtype=bool)
3914
+ site_ids = util.safe_np_int_cast(site_ids, np.int32)
3915
+ if np.any(site_ids < 0) or np.any(site_ids >= len(self.sites)):
3916
+ raise ValueError("Site ID out of bounds")
3917
+ keep_sites[site_ids] = 0
3918
+ new_as, new_as_offset = keep_with_offset(
3919
+ keep_sites, self.sites.ancestral_state, self.sites.ancestral_state_offset
3920
+ )
3921
+ new_md, new_md_offset = keep_with_offset(
3922
+ keep_sites, self.sites.metadata, self.sites.metadata_offset
3923
+ )
3924
+ self.sites.set_columns(
3925
+ position=self.sites.position[keep_sites],
3926
+ ancestral_state=new_as,
3927
+ ancestral_state_offset=new_as_offset,
3928
+ metadata=new_md,
3929
+ metadata_offset=new_md_offset,
3930
+ )
3931
+ # We also need to adjust the mutations table, as it references into sites
3932
+ keep_mutations = keep_sites[self.mutations.site]
3933
+ new_ds, new_ds_offset = keep_with_offset(
3934
+ keep_mutations,
3935
+ self.mutations.derived_state,
3936
+ self.mutations.derived_state_offset,
3937
+ )
3938
+ new_md, new_md_offset = keep_with_offset(
3939
+ keep_mutations, self.mutations.metadata, self.mutations.metadata_offset
3940
+ )
3941
+ # Site numbers will have changed
3942
+ site_map = np.cumsum(keep_sites, dtype=self.mutations.site.dtype) - 1
3943
+ # Mutation numbers will change, so the parent references need altering
3944
+ mutation_map = np.cumsum(keep_mutations, dtype=self.mutations.parent.dtype) - 1
3945
+ # Map parent == -1 to -1, and check this has worked (assumes tskit.NULL == -1)
3946
+ mutation_map = np.append(mutation_map, -1).astype(self.mutations.parent.dtype)
3947
+ assert mutation_map[tskit.NULL] == tskit.NULL
3948
+ self.mutations.set_columns(
3949
+ site=site_map[self.mutations.site[keep_mutations]],
3950
+ node=self.mutations.node[keep_mutations],
3951
+ time=self.mutations.time[keep_mutations],
3952
+ derived_state=new_ds,
3953
+ derived_state_offset=new_ds_offset,
3954
+ parent=mutation_map[self.mutations.parent[keep_mutations]],
3955
+ metadata=new_md,
3956
+ metadata_offset=new_md_offset,
3957
+ )
3958
+ if record_provenance:
3959
+ # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243
3960
+ parameters = {"command": "delete_sites", "TODO": "add parameters"}
3961
+ self.provenances.add_row(
3962
+ record=json.dumps(provenance.get_provenance_dict(parameters))
3963
+ )
3964
+
3965
+ def delete_intervals(self, intervals, simplify=True, record_provenance=True):
3966
+ """
3967
+ Delete all information from this set of tables which lies *within* the
3968
+ specified list of genomic intervals. This is identical to
3969
+ :meth:`TreeSequence.delete_intervals` but acts *in place* to alter
3970
+ the data in this :class:`TableCollection`.
3971
+
3972
+ :param array_like intervals: A list (start, end) pairs describing the
3973
+ genomic intervals to delete. Intervals must be non-overlapping and
3974
+ in increasing order. The list of intervals must be interpretable as a
3975
+ 2D numpy array with shape (N, 2), where N is the number of intervals.
3976
+ :param bool simplify: If True, run simplify on the tables so that nodes
3977
+ no longer used are discarded. (Default: True).
3978
+ :param bool record_provenance: If ``True``, add details of this operation
3979
+ to the provenance table in this TableCollection. (Default: ``True``).
3980
+ """
3981
+ self.keep_intervals(
3982
+ util.negate_intervals(intervals, 0, self.sequence_length),
3983
+ simplify=simplify,
3984
+ record_provenance=False,
3985
+ )
3986
+ if record_provenance:
3987
+ parameters = {"command": "delete_intervals", "TODO": "add parameters"}
3988
+ self.provenances.add_row(
3989
+ record=json.dumps(provenance.get_provenance_dict(parameters))
3990
+ )
3991
+
3992
+ def keep_intervals(self, intervals, simplify=True, record_provenance=True):
3993
+ """
3994
+ Delete all information from this set of tables which lies *outside* the
3995
+ specified list of genomic intervals. This is identical to
3996
+ :meth:`TreeSequence.keep_intervals` but acts *in place* to alter
3997
+ the data in this :class:`TableCollection`.
3998
+
3999
+ :param array_like intervals: A list (start, end) pairs describing the
4000
+ genomic intervals to keep. Intervals must be non-overlapping and
4001
+ in increasing order. The list of intervals must be interpretable as a
4002
+ 2D numpy array with shape (N, 2), where N is the number of intervals.
4003
+ :param bool simplify: If True, run simplify on the tables so that nodes
4004
+ no longer used are discarded. Must be ``False`` if input tree sequence
4005
+ includes migrations. (Default: True).
4006
+ :param bool record_provenance: If ``True``, add details of this operation
4007
+ to the provenance table in this TableCollection. (Default: ``True``).
4008
+ """
4009
+ intervals = util.intervals_to_np_array(intervals, 0, self.sequence_length)
4010
+
4011
+ edges = self.edges.copy()
4012
+ self.edges.clear()
4013
+ migrations = self.migrations.copy()
4014
+ self.migrations.clear()
4015
+ keep_sites = np.repeat(False, self.sites.num_rows)
4016
+ for s, e in intervals:
4017
+ curr_keep_sites = np.logical_and(
4018
+ self.sites.position >= s, self.sites.position < e
4019
+ )
4020
+ keep_sites = np.logical_or(keep_sites, curr_keep_sites)
4021
+ keep_edges = np.logical_not(
4022
+ np.logical_or(edges.right <= s, edges.left >= e)
4023
+ )
4024
+ metadata, metadata_offset = keep_with_offset(
4025
+ keep_edges, edges.metadata, edges.metadata_offset
4026
+ )
4027
+ self.edges.append_columns(
4028
+ left=np.fmax(s, edges.left[keep_edges]),
4029
+ right=np.fmin(e, edges.right[keep_edges]),
4030
+ parent=edges.parent[keep_edges],
4031
+ child=edges.child[keep_edges],
4032
+ metadata=metadata,
4033
+ metadata_offset=metadata_offset,
4034
+ )
4035
+ keep_migrations = np.logical_not(
4036
+ np.logical_or(migrations.right <= s, migrations.left >= e)
4037
+ )
4038
+ metadata, metadata_offset = keep_with_offset(
4039
+ keep_migrations, migrations.metadata, migrations.metadata_offset
4040
+ )
4041
+ self.migrations.append_columns(
4042
+ left=np.fmax(s, migrations.left[keep_migrations]),
4043
+ right=np.fmin(e, migrations.right[keep_migrations]),
4044
+ node=migrations.node[keep_migrations],
4045
+ source=migrations.source[keep_migrations],
4046
+ dest=migrations.dest[keep_migrations],
4047
+ time=migrations.time[keep_migrations],
4048
+ metadata=metadata,
4049
+ metadata_offset=metadata_offset,
4050
+ )
4051
+ self.delete_sites(
4052
+ np.where(np.logical_not(keep_sites))[0], record_provenance=False
4053
+ )
4054
+
4055
+ self.sort()
4056
+ if simplify:
4057
+ self.simplify(record_provenance=False)
4058
+ if record_provenance:
4059
+ parameters = {"command": "keep_intervals", "TODO": "add parameters"}
4060
+ self.provenances.add_row(
4061
+ record=json.dumps(provenance.get_provenance_dict(parameters))
4062
+ )
4063
+
4064
+ def _check_trim_conditions(self):
4065
+ if self.has_reference_sequence():
4066
+ raise ValueError(
4067
+ "Cannot trim if there is a reference sequence. Please remove the "
4068
+ "reference sequence by calling `.reference_sequence.clear()` first."
4069
+ )
4070
+ if self.migrations.num_rows > 0:
4071
+ if (np.min(self.migrations.left) < np.min(self.edges.left)) and (
4072
+ np.max(self.migrations.right) > np.max(self.edges.right)
4073
+ ):
4074
+ raise ValueError(
4075
+ "Cannot trim a tree sequence with migrations which exist to the"
4076
+ "left of the leftmost edge or to the right of the rightmost edge."
4077
+ )
4078
+ if self.edges.num_rows == 0:
4079
+ raise ValueError(
4080
+ "Trimming a tree sequence with no edges would reduce the sequence length"
4081
+ " to zero, which is not allowed"
4082
+ )
4083
+
4084
+ def ltrim(self, record_provenance=True):
4085
+ """
4086
+ Reset the coordinate system used in these tables, changing the left and right
4087
+ genomic positions in the edge table such that the leftmost edge now starts at
4088
+ position 0. This is identical to :meth:`TreeSequence.ltrim` but acts *in place*
4089
+ to alter the data in this :class:`TableCollection`.
4090
+
4091
+ :param bool record_provenance: If ``True``, add details of this operation
4092
+ to the provenance table in this TableCollection. (Default: ``True``).
4093
+ """
4094
+ self._check_trim_conditions()
4095
+ leftmost = np.min(self.edges.left)
4096
+ self.delete_sites(
4097
+ np.where(self.sites.position < leftmost), record_provenance=False
4098
+ )
4099
+ self.edges.set_columns(
4100
+ left=self.edges.left - leftmost,
4101
+ right=self.edges.right - leftmost,
4102
+ parent=self.edges.parent,
4103
+ child=self.edges.child,
4104
+ )
4105
+ self.sites.set_columns(
4106
+ position=self.sites.position - leftmost,
4107
+ ancestral_state=self.sites.ancestral_state,
4108
+ ancestral_state_offset=self.sites.ancestral_state_offset,
4109
+ metadata=self.sites.metadata,
4110
+ metadata_offset=self.sites.metadata_offset,
4111
+ )
4112
+ self.migrations.set_columns(
4113
+ left=self.migrations.left - leftmost,
4114
+ right=self.migrations.right - leftmost,
4115
+ time=self.migrations.time,
4116
+ node=self.migrations.node,
4117
+ source=self.migrations.source,
4118
+ dest=self.migrations.dest,
4119
+ )
4120
+ self.sequence_length = self.sequence_length - leftmost
4121
+ if record_provenance:
4122
+ # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243
4123
+ parameters = {
4124
+ "command": "ltrim",
4125
+ }
4126
+ self.provenances.add_row(
4127
+ record=json.dumps(provenance.get_provenance_dict(parameters))
4128
+ )
4129
+
4130
+ def rtrim(self, record_provenance=True):
4131
+ """
4132
+ Reset the ``sequence_length`` property so that the sequence ends at the end of
4133
+ the last edge. This is identical to :meth:`TreeSequence.rtrim` but acts
4134
+ *in place* to alter the data in this :class:`TableCollection`.
4135
+
4136
+ :param bool record_provenance: If ``True``, add details of this operation
4137
+ to the provenance table in this TableCollection. (Default: ``True``).
4138
+ """
4139
+ self._check_trim_conditions()
4140
+ rightmost = np.max(self.edges.right)
4141
+ self.delete_sites(
4142
+ np.where(self.sites.position >= rightmost), record_provenance=False
4143
+ )
4144
+ self.sequence_length = rightmost
4145
+ if record_provenance:
4146
+ # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243
4147
+ parameters = {
4148
+ "command": "rtrim",
4149
+ }
4150
+ self.provenances.add_row(
4151
+ record=json.dumps(provenance.get_provenance_dict(parameters))
4152
+ )
4153
+
4154
+ def trim(self, record_provenance=True):
4155
+ """
4156
+ Trim away any empty regions on the right and left of the tree sequence encoded by
4157
+ these tables. This is identical to :meth:`TreeSequence.trim` but acts *in place*
4158
+ to alter the data in this :class:`TableCollection`.
4159
+
4160
+ :param bool record_provenance: If ``True``, add details of this operation
4161
+ to the provenance table in this TableCollection. (Default: ``True``).
4162
+ """
4163
+ self.rtrim(record_provenance=False)
4164
+ self.ltrim(record_provenance=False)
4165
+ if record_provenance:
4166
+ # TODO replace with a version of https://github.com/tskit-dev/tskit/pull/243
4167
+ parameters = {
4168
+ "command": "trim",
4169
+ }
4170
+ self.provenances.add_row(
4171
+ record=json.dumps(provenance.get_provenance_dict(parameters))
4172
+ )
4173
+
4174
+ def shift(self, value, *, sequence_length=None, record_provenance=True):
4175
+ """
4176
+ Shift the coordinate system (used by edges, sites, and migrations) of this
4177
+ TableCollection by a given value. This is identical to :meth:`TreeSequence.shift`
4178
+ but acts *in place* to alter the data in this :class:`TableCollection`.
4179
+
4180
+ .. note::
4181
+ No attempt is made to check that the new coordinate system or sequence length
4182
+ is valid: if you wish to do this, use {meth}`TreeSequence.shift` instead.
4183
+
4184
+ :param value: The amount by which to shift the coordinate system.
4185
+ :param sequence_length: The new sequence length of the tree sequence. If
4186
+ ``None`` (default) add `value` to the sequence length.
4187
+ """
4188
+ if self.has_reference_sequence():
4189
+ raise ValueError(
4190
+ "Cannot shift if there is a reference sequence. Please remove the "
4191
+ "reference sequence by calling `.reference_sequence.clear()` first."
4192
+ )
4193
+ self.drop_index()
4194
+ self.edges.left += value
4195
+ self.edges.right += value
4196
+ self.migrations.left += value
4197
+ self.migrations.right += value
4198
+ self.sites.position += value
4199
+ if sequence_length is None:
4200
+ self.sequence_length += value
4201
+ else:
4202
+ self.sequence_length = sequence_length
4203
+ if record_provenance:
4204
+ parameters = {
4205
+ "command": "shift",
4206
+ "value": value,
4207
+ "sequence_length": sequence_length,
4208
+ }
4209
+ self.provenances.add_row(
4210
+ record=json.dumps(provenance.get_provenance_dict(parameters))
4211
+ )
4212
+
4213
+ def delete_older(self, time):
4214
+ """
4215
+ Deletes edge, mutation and migration information at least as old as
4216
+ the specified time.
4217
+
4218
+ .. seealso:: This method is similar to the higher-level
4219
+ :meth:`TreeSequence.decapitate` method, which also splits
4220
+ edges that intersect with the given time.
4221
+ :meth:`TreeSequence.decapitate`
4222
+ is more useful for most purposes, and may be what
4223
+ you need instead of this method!
4224
+
4225
+ For the purposes of this method, an edge covers the times from the
4226
+ child node up until the *parent* node, so that any any edge with parent
4227
+ node time > ``time`` will be removed.
4228
+
4229
+ Any mutation whose time is >= ``time`` will be removed. A mutation's time
4230
+ is its associated ``time`` value, or the time of its node if the
4231
+ mutation's time was marked as unknown (:data:`UNKNOWN_TIME`).
4232
+
4233
+ Any migration with time >= ``time`` will be removed.
4234
+
4235
+ The node table is not affected by this operation.
4236
+
4237
+ .. note:: This method does not have any specific sorting requirements
4238
+ and will maintain mutation parent mappings.
4239
+
4240
+ :param float time: The cutoff time.
4241
+ """
4242
+ self._ll_tables.delete_older(time)
4243
+
4244
+ def clear(
4245
+ self,
4246
+ clear_provenance=False,
4247
+ clear_metadata_schemas=False,
4248
+ clear_ts_metadata_and_schema=False,
4249
+ ):
4250
+ """
4251
+ Remove all rows of the data tables, optionally remove provenance, metadata
4252
+ schemas and ts-level metadata.
4253
+
4254
+ :param bool clear_provenance: If ``True``, remove all rows of the provenance
4255
+ table. (Default: ``False``).
4256
+ :param bool clear_metadata_schemas: If ``True``, clear the table metadata
4257
+ schemas. (Default: ``False``).
4258
+ :param bool clear_ts_metadata_and_schema: If ``True``, clear the tree-sequence
4259
+ level metadata and schema (Default: ``False``).
4260
+ """
4261
+ self._ll_tables.clear(
4262
+ clear_provenance=clear_provenance,
4263
+ clear_metadata_schemas=clear_metadata_schemas,
4264
+ clear_ts_metadata_and_schema=clear_ts_metadata_and_schema,
4265
+ )
4266
+
4267
+ def has_index(self):
4268
+ """
4269
+ Returns True if this TableCollection is indexed. See :ref:`sec_table_indexes`
4270
+ for information on indexes.
4271
+ """
4272
+ return bool(self._ll_tables.has_index())
4273
+
4274
+ def build_index(self):
4275
+ """
4276
+ Builds an index on this TableCollection. Any existing indexes are automatically
4277
+ dropped. See :ref:`sec_table_indexes` for information on indexes.
4278
+ """
4279
+ self._ll_tables.build_index()
4280
+
4281
+ def drop_index(self):
4282
+ """
4283
+ Drops any indexes present on this table collection. If the tables are not
4284
+ currently indexed this method has no effect. See :ref:`sec_table_indexes`
4285
+ for information on indexes.
4286
+ """
4287
+ self._ll_tables.drop_index()
4288
+
4289
+ def subset(
4290
+ self,
4291
+ nodes,
4292
+ record_provenance=True,
4293
+ *,
4294
+ reorder_populations=None,
4295
+ remove_unreferenced=None,
4296
+ ):
4297
+ """
4298
+ Modifies the tables in place to contain only the entries referring to
4299
+ the provided list of node IDs, with nodes reordered according to the
4300
+ order they appear in the list. Other tables are :meth:`sorted <sort>`
4301
+ to conform to the :ref:`sec_valid_tree_sequence_requirements`, and
4302
+ additionally sorted as described in the documentation for the equivalent
4303
+ tree sequence method :meth:`TreeSequence.subset`: please see this for more
4304
+ detail.
4305
+
4306
+ :param list nodes: The list of nodes for which to retain information. This
4307
+ may be a numpy array (or array-like) object (dtype=np.int32).
4308
+ :param bool record_provenance: Whether to record a provenance entry
4309
+ in the provenance table for this operation.
4310
+ :param bool reorder_populations: Whether to reorder the population table
4311
+ (default: True). If False, the population table will not be altered
4312
+ in any way.
4313
+ :param bool remove_unreferenced: Whether sites, individuals, and populations
4314
+ that are not referred to by any retained entries in the tables should
4315
+ be removed (default: True). See the description for details.
4316
+ """
4317
+ reorder_populations = (
4318
+ True if reorder_populations is None else reorder_populations
4319
+ )
4320
+ remove_unreferenced = (
4321
+ True if remove_unreferenced is None else remove_unreferenced
4322
+ )
4323
+ nodes = util.safe_np_int_cast(nodes, np.int32)
4324
+ self._ll_tables.subset(
4325
+ nodes,
4326
+ reorder_populations=reorder_populations,
4327
+ remove_unreferenced=remove_unreferenced,
4328
+ )
4329
+ self.sort()
4330
+ if record_provenance:
4331
+ parameters = {"command": "subset", "nodes": nodes.tolist()}
4332
+ self.provenances.add_row(
4333
+ record=json.dumps(provenance.get_provenance_dict(parameters))
4334
+ )
4335
+
4336
+ def union(
4337
+ self,
4338
+ other,
4339
+ node_mapping,
4340
+ check_shared_equality=True,
4341
+ add_populations=True,
4342
+ record_provenance=True,
4343
+ *,
4344
+ all_edges=False,
4345
+ all_mutations=False,
4346
+ ):
4347
+ """
4348
+ Modifies the table collection in place by adding the non-shared
4349
+ portions of ``other`` to itself. To perform the node-wise union,
4350
+ the method relies on a ``node_mapping`` array, that maps nodes in
4351
+ ``other`` to its equivalent node in ``self`` or ``tskit.NULL`` if
4352
+ the node is exclusive to ``other``. See :meth:`TreeSequence.union` for a more
4353
+ detailed description.
4354
+
4355
+ :param TableCollection other: Another table collection.
4356
+ :param list node_mapping: An array of node IDs that relate nodes in
4357
+ ``other`` to nodes in ``self``: the k-th element of ``node_mapping``
4358
+ should be the index of the equivalent node in ``self``, or
4359
+ ``tskit.NULL`` if the node is not present in ``self`` (in which case it
4360
+ will be added to self).
4361
+ :param bool check_shared_equality: If True, the shared portions of the
4362
+ table collections will be checked for equality.
4363
+ :param bool add_populations: If True, nodes new to ``self`` will be
4364
+ assigned new population IDs.
4365
+ :param bool record_provenance: Whether to record a provenance entry
4366
+ in the provenance table for this operation.
4367
+ :param bool all_edges: If True, then all edges in ``other`` are added
4368
+ to ``self``.
4369
+ :param bool all_mutations: If True, then all mutations in ``other`` are added
4370
+ to ``self``.
4371
+ """
4372
+ node_mapping = util.safe_np_int_cast(node_mapping, np.int32)
4373
+ self._ll_tables.union(
4374
+ other._ll_tables,
4375
+ node_mapping,
4376
+ check_shared_equality=check_shared_equality,
4377
+ add_populations=add_populations,
4378
+ all_edges=all_edges,
4379
+ all_mutations=all_mutations,
4380
+ )
4381
+ if record_provenance:
4382
+ other_records = [prov.record for prov in other.provenances]
4383
+ other_timestamps = [prov.timestamp for prov in other.provenances]
4384
+ parameters = {
4385
+ "command": "union",
4386
+ "other": {"timestamp": other_timestamps, "record": other_records},
4387
+ "node_mapping": node_mapping.tolist(),
4388
+ }
4389
+ self.provenances.add_row(
4390
+ record=json.dumps(provenance.get_provenance_dict(parameters))
4391
+ )
4392
+
4393
+ def ibd_segments(
4394
+ self,
4395
+ *,
4396
+ within=None,
4397
+ between=None,
4398
+ max_time=None,
4399
+ min_span=None,
4400
+ store_pairs=None,
4401
+ store_segments=None,
4402
+ ):
4403
+ """
4404
+ Equivalent to the :meth:`TreeSequence.ibd_segments` method; please see its
4405
+ documentation for more details, and use this method only if you specifically need
4406
+ to work with a :class:`TableCollection` object.
4407
+
4408
+ This method has the same data requirements as
4409
+ :meth:`TableCollection.simplify`. In particular, the tables in the collection
4410
+ have :ref:`required <sec_valid_tree_sequence_requirements>` sorting orders.
4411
+ To enforce this, you can call :meth:`TableCollection.sort` before using this
4412
+ method. If the edge table contains any edges with identical
4413
+ parents and children over adjacent genomic intervals, any IBD intervals
4414
+ underneath the edges will also be split across the breakpoint(s). To prevent this
4415
+ behaviour in this situation, use :meth:`EdgeTable.squash` beforehand.
4416
+
4417
+ :param list within: As for the :meth:`TreeSequence.ibd_segments` method.
4418
+ :param list[list] between: As for the :meth:`TreeSequence.ibd_segments` method.
4419
+ :param float max_time: As for the :meth:`TreeSequence.ibd_segments` method.
4420
+ :param float min_span: As for the :meth:`TreeSequence.ibd_segments` method.
4421
+ :param bool store_pairs: As for the :meth:`TreeSequence.ibd_segments` method.
4422
+ :param bool store_segments: As for the :meth:`TreeSequence.ibd_segments` method.
4423
+ :return: An :class:`.IdentitySegments` object containing the recorded
4424
+ IBD information.
4425
+ :rtype: IdentitySegments
4426
+ """
4427
+ max_time = np.inf if max_time is None else max_time
4428
+ min_span = 0 if min_span is None else min_span
4429
+ store_pairs = False if store_pairs is None else store_pairs
4430
+ store_segments = False if store_segments is None else store_segments
4431
+ if within is not None and between is not None:
4432
+ raise ValueError(
4433
+ "The ``within`` and ``between`` arguments are mutually exclusive"
4434
+ )
4435
+ if between is not None:
4436
+ sample_set_sizes = np.array(
4437
+ [len(sample_set) for sample_set in between], dtype=np.uint64
4438
+ )
4439
+ # hstack has some annoying quirks around its handling of empty
4440
+ # lists which we need to work around. In a way it would be more
4441
+ # convenient to detect these conditions as errors, but then we
4442
+ # end up having to workaround edge cases in the tests and its
4443
+ # mathematically neater this way.
4444
+ pre_flattened = [lst for lst in between if len(lst) > 0]
4445
+ if len(pre_flattened) == 0:
4446
+ flattened = []
4447
+ else:
4448
+ flattened = util.safe_np_int_cast(np.hstack(pre_flattened), np.int32)
4449
+ ll_result = self._ll_tables.ibd_segments_between(
4450
+ sample_set_sizes=sample_set_sizes,
4451
+ sample_sets=flattened,
4452
+ max_time=max_time,
4453
+ min_span=min_span,
4454
+ store_pairs=store_pairs,
4455
+ store_segments=store_segments,
4456
+ )
4457
+ else:
4458
+ if within is not None:
4459
+ within = util.safe_np_int_cast(within, np.int32)
4460
+ ll_result = self._ll_tables.ibd_segments_within(
4461
+ samples=within,
4462
+ max_time=max_time,
4463
+ min_span=min_span,
4464
+ store_pairs=store_pairs,
4465
+ store_segments=store_segments,
4466
+ )
4467
+ return IdentitySegments(
4468
+ ll_result,
4469
+ max_time=max_time,
4470
+ min_span=min_span,
4471
+ store_pairs=store_pairs,
4472
+ store_segments=store_segments,
4473
+ )
4474
+
4475
+
4476
+ class ImmutableNodeTable(ImmutableMetadataTable):
4477
+ table_name = "nodes"
4478
+ mutable_class = NodeTable
4479
+
4480
+ column_names = [
4481
+ "time",
4482
+ "flags",
4483
+ "population",
4484
+ "individual",
4485
+ "metadata",
4486
+ "metadata_offset",
4487
+ ]
4488
+
4489
+
4490
+ class ImmutableIndividualTable(ImmutableMetadataTable):
4491
+ table_name = "individuals"
4492
+ mutable_class = IndividualTable
4493
+
4494
+ _row_field_indices = (0, 1, 2, 3)
4495
+
4496
+ column_names = [
4497
+ "flags",
4498
+ "location",
4499
+ "location_offset",
4500
+ "parents",
4501
+ "parents_offset",
4502
+ "metadata",
4503
+ "metadata_offset",
4504
+ ]
4505
+
4506
+
4507
+ class ImmutableEdgeTable(ImmutableMetadataTable):
4508
+ table_name = "edges"
4509
+ mutable_class = EdgeTable
4510
+
4511
+ column_names = [
4512
+ "left",
4513
+ "right",
4514
+ "parent",
4515
+ "child",
4516
+ "metadata",
4517
+ "metadata_offset",
4518
+ ]
4519
+
4520
+
4521
+ class ImmutableMigrationTable(ImmutableMetadataTable):
4522
+ table_name = "migrations"
4523
+ mutable_class = MigrationTable
4524
+
4525
+ column_names = [
4526
+ "left",
4527
+ "right",
4528
+ "node",
4529
+ "source",
4530
+ "dest",
4531
+ "time",
4532
+ "metadata",
4533
+ "metadata_offset",
4534
+ ]
4535
+
4536
+
4537
+ class ImmutableSiteTable(ImmutableMetadataTable):
4538
+ table_name = "sites"
4539
+ mutable_class = SiteTable
4540
+
4541
+ _row_field_indices = (0, 1, 4)
4542
+
4543
+ column_names = [
4544
+ "position",
4545
+ "ancestral_state",
4546
+ "ancestral_state_offset",
4547
+ "metadata",
4548
+ "metadata_offset",
4549
+ ]
4550
+
4551
+
4552
+ class ImmutableMutationTable(ImmutableMetadataTable):
4553
+ table_name = "mutations"
4554
+ mutable_class = MutationTable
4555
+
4556
+ _row_field_indices = (0, 1, 2, 3, 4, 5)
4557
+
4558
+ column_names = [
4559
+ "site",
4560
+ "node",
4561
+ "time",
4562
+ "derived_state",
4563
+ "derived_state_offset",
4564
+ "parent",
4565
+ "metadata",
4566
+ "metadata_offset",
4567
+ ]
4568
+
4569
+
4570
+ class ImmutablePopulationTable(ImmutableMetadataTable):
4571
+ table_name = "populations"
4572
+ mutable_class = PopulationTable
4573
+
4574
+ column_names = ["metadata", "metadata_offset"]
4575
+
4576
+
4577
+ class ImmutableProvenanceTable(ImmutableBaseTable):
4578
+ table_name = "provenances"
4579
+ mutable_class = ProvenanceTable
4580
+
4581
+ column_names = [
4582
+ "record",
4583
+ "record_offset",
4584
+ "timestamp",
4585
+ "timestamp_offset",
4586
+ ]
4587
+
4588
+ def equals(self, other, ignore_timestamps=False):
4589
+ return self._equals_internal(other, ignore_timestamps=bool(ignore_timestamps))
4590
+
4591
+ def assert_equals(self, other, *, ignore_timestamps=False):
4592
+ if ignore_timestamps and getattr(self, "table_name", None) != "provenances":
4593
+ raise ValueError("ignore_timestamps is only valid for Provenance tables")
4594
+ self._assert_equals_internal(other, ignore_timestamps=bool(ignore_timestamps))
4595
+
4596
+
4597
+ class ImmutableTableCollection(metadata.MetadataProvider):
4598
+ """
4599
+ An immutable view of a table collection backed by a :class:`TreeSequence`.
4600
+ Provides zero-copy read access to all table data without allowing mutation.
4601
+
4602
+ This class is returned by :attr:`TreeSequence.tables` and provides efficient,
4603
+ read-only access to the underlying table data. Since it's backed directly by
4604
+ the low-level TreeSequence representation, no copying of data is required.
4605
+
4606
+ All methods from TableCollection that do not mutate the data are reflected here.
4607
+
4608
+ To obtain a mutable copy of this table collection, use the :meth:`.copy`
4609
+ method which returns a :class:`TableCollection` instance that can be modified.
4610
+ Alternatively, use :meth:`TreeSequence.dump_tables` to get a mutable copy
4611
+ directly from the tree sequence.
4612
+
4613
+ All mutator methods present on :class:`TableCollection` (such as ``sort()``,
4614
+ ``simplify()``, ``clear()``, etc.) will raise an :class:`ImmutableTableError`
4615
+ if called on an immutable table collection.
4616
+ """
4617
+
4618
+ def __init__(self, ll_tree_sequence):
4619
+ object.__setattr__(self, "_initialised", False)
4620
+ self._llts = ll_tree_sequence
4621
+ super().__init__(ll_tree_sequence)
4622
+
4623
+ # Create immutable table views - lazy initialization could be added later
4624
+ self.individuals = ImmutableIndividualTable(ll_tree_sequence)
4625
+ self.nodes = ImmutableNodeTable(ll_tree_sequence)
4626
+ self.edges = ImmutableEdgeTable(ll_tree_sequence)
4627
+ self.migrations = ImmutableMigrationTable(ll_tree_sequence)
4628
+ self.sites = ImmutableSiteTable(ll_tree_sequence)
4629
+ self.mutations = ImmutableMutationTable(ll_tree_sequence)
4630
+ self.populations = ImmutablePopulationTable(ll_tree_sequence)
4631
+ self.provenances = ImmutableProvenanceTable(ll_tree_sequence)
4632
+ object.__setattr__(self, "_initialised", True)
4633
+
4634
+ @property
4635
+ def sequence_length(self):
4636
+ return self._llts.get_sequence_length()
4637
+
4638
+ @property
4639
+ def file_uuid(self):
4640
+ return self._llts.get_file_uuid()
4641
+
4642
+ @property
4643
+ def time_units(self):
4644
+ return self._llts.get_time_units()
4645
+
4646
+ @property
4647
+ def reference_sequence(self):
4648
+ return ReferenceSequence(self._llts.reference_sequence)
4649
+
4650
+ @property
4651
+ def metadata_schema(self):
4652
+ return metadata.parse_metadata_schema(self._llts.get_metadata_schema())
4653
+
4654
+ @property
4655
+ def metadata(self):
4656
+ return self.metadata_schema.decode_row(self.metadata_bytes)
4657
+
4658
+ @property
4659
+ def metadata_bytes(self):
4660
+ return self._llts.get_metadata()
4661
+
4662
+ @property
4663
+ def table_name_map(self):
4664
+ return {
4665
+ "edges": self.edges,
4666
+ "individuals": self.individuals,
4667
+ "migrations": self.migrations,
4668
+ "mutations": self.mutations,
4669
+ "nodes": self.nodes,
4670
+ "populations": self.populations,
4671
+ "provenances": self.provenances,
4672
+ "sites": self.sites,
4673
+ }
4674
+
4675
+ @property
4676
+ def indexes(self) -> TableCollectionIndexes:
4677
+ return TableCollectionIndexes(
4678
+ **{
4679
+ "edge_insertion_order": self._llts.indexes_edge_insertion_order,
4680
+ "edge_removal_order": self._llts.indexes_edge_removal_order,
4681
+ }
4682
+ )
4683
+
4684
+ def has_index(self):
4685
+ return (
4686
+ self._llts.indexes_edge_insertion_order is not None
4687
+ and self._llts.indexes_edge_removal_order is not None
4688
+ )
4689
+
4690
+ def asdict(self, force_offset_64=False):
4691
+ # TODO Could avoid the copy here
4692
+ return self.copy().asdict(force_offset_64=force_offset_64)
4693
+
4694
+ def equals(
4695
+ self,
4696
+ other,
4697
+ *,
4698
+ ignore_metadata=False,
4699
+ ignore_ts_metadata=False,
4700
+ ignore_provenance=False,
4701
+ ignore_timestamps=False,
4702
+ ignore_tables=False,
4703
+ ignore_reference_sequence=False,
4704
+ ):
4705
+ if self is other:
4706
+ return True
4707
+ try:
4708
+ self.assert_equals(
4709
+ other,
4710
+ ignore_metadata=ignore_metadata,
4711
+ ignore_ts_metadata=ignore_ts_metadata,
4712
+ ignore_provenance=ignore_provenance,
4713
+ ignore_timestamps=ignore_timestamps,
4714
+ ignore_tables=ignore_tables,
4715
+ ignore_reference_sequence=ignore_reference_sequence,
4716
+ )
4717
+ return True
4718
+ except AssertionError:
4719
+ return False
4720
+
4721
+ def assert_equals(
4722
+ self,
4723
+ other,
4724
+ *,
4725
+ ignore_metadata=False,
4726
+ ignore_ts_metadata=False,
4727
+ ignore_provenance=False,
4728
+ ignore_timestamps=False,
4729
+ ignore_tables=False,
4730
+ ignore_reference_sequence=False,
4731
+ ):
4732
+ _assert_table_collections_equal(
4733
+ self,
4734
+ other,
4735
+ ignore_metadata=ignore_metadata,
4736
+ ignore_ts_metadata=ignore_ts_metadata,
4737
+ ignore_provenance=ignore_provenance,
4738
+ ignore_timestamps=ignore_timestamps,
4739
+ ignore_tables=ignore_tables,
4740
+ ignore_reference_sequence=ignore_reference_sequence,
4741
+ )
4742
+
4743
+ @property
4744
+ def nbytes(self):
4745
+ return sum(
4746
+ (
4747
+ 8, # sequence length
4748
+ len(self.metadata_bytes) + len(self._llts.get_metadata_schema()),
4749
+ len(self.time_units.encode()),
4750
+ self.indexes.nbytes,
4751
+ self.reference_sequence.nbytes,
4752
+ sum(table.nbytes for table in self.table_name_map.values()),
4753
+ )
4754
+ )
4755
+
4756
+ def __eq__(self, other):
4757
+ return self.equals(other)
4758
+
4759
+ def __str__(self):
4760
+ return "\n".join(
4761
+ [
4762
+ "ImmutableTableCollection",
4763
+ "",
4764
+ f"Sequence Length: {self.sequence_length}",
4765
+ f"Time units: {self.time_units}",
4766
+ "",
4767
+ "Individuals",
4768
+ str(self.individuals),
4769
+ "Nodes",
4770
+ str(self.nodes),
4771
+ "Edges",
4772
+ str(self.edges),
4773
+ "Sites",
4774
+ str(self.sites),
4775
+ "Mutations",
4776
+ str(self.mutations),
4777
+ "Migrations",
4778
+ str(self.migrations),
4779
+ "Populations",
4780
+ str(self.populations),
4781
+ "Provenances",
4782
+ str(self.provenances),
4783
+ ]
4784
+ )
4785
+
4786
+ def link_ancestors(self, samples, ancestors):
4787
+ """
4788
+ See :meth:`TableCollection.link_ancestors`.
4789
+ """
4790
+ samples = util.safe_np_int_cast(samples, np.int32)
4791
+ ancestors = util.safe_np_int_cast(ancestors, np.int32)
4792
+ ll_edge_table = self._llts.link_ancestors(samples, ancestors)
4793
+ return EdgeTable(ll_table=ll_edge_table)
4794
+
4795
+ def map_ancestors(self, *args, **kwargs):
4796
+ """
4797
+ Deprecated alias for :meth:`link_ancestors`.
4798
+ """
4799
+ return self.link_ancestors(*args, **kwargs)
4800
+
4801
+ _MUTATOR_METHODS = {
4802
+ "clear",
4803
+ "sort",
4804
+ "sort_individuals",
4805
+ "canonicalise",
4806
+ "compute_mutation_parents",
4807
+ "compute_mutation_times",
4808
+ "deduplicate_sites",
4809
+ "delete_sites",
4810
+ "delete_intervals",
4811
+ "keep_intervals",
4812
+ "ltrim",
4813
+ "rtrim",
4814
+ "trim",
4815
+ "shift",
4816
+ "delete_older",
4817
+ "build_index",
4818
+ "drop_index",
4819
+ "subset",
4820
+ "union",
4821
+ "ibd_segments",
4822
+ "fromdict",
4823
+ "simplify",
4824
+ }
4825
+
4826
+ def copy(self):
4827
+ ll_tables = _tskit.TableCollection(self.sequence_length)
4828
+ self._llts.dump_tables(ll_tables)
4829
+ return TableCollection(ll_tables=ll_tables)
4830
+
4831
+ def dump(self, file_or_path):
4832
+ return self.copy().dump(file_or_path)
4833
+
4834
+ def tree_sequence(self):
4835
+ return tskit.TreeSequence(self._llts)
4836
+
4837
+ def has_reference_sequence(self):
4838
+ return self._llts.has_reference_sequence()
4839
+
4840
+ def __getattr__(self, name):
4841
+ if name in self._MUTATOR_METHODS:
4842
+ raise ImmutableTableError(
4843
+ f"Cannot call {name}() on immutable table collection. "
4844
+ f"Use TreeSequence.dump_tables() for mutable copy."
4845
+ )
4846
+ raise AttributeError(
4847
+ f"'{self.__class__.__name__}' object has no attribute '{name}'"
4848
+ )
4849
+
4850
+ def __setattr__(self, name, value):
4851
+ # Allow all assignments during initialization
4852
+ if not self._initialised:
4853
+ object.__setattr__(self, name, value)
4854
+ return
4855
+ raise ImmutableTableError(
4856
+ f"Cannot set attribute '{name}' on immutable table collection. "
4857
+ f"Use TreeSequence.dump_tables() for mutable copy."
4858
+ )