tskit 1.0.0b3__cp310-cp310-win_amd64.whl → 1.0.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
tskit/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # Definitive location for the version number.
2
2
  # During development, should be x.y.z.devN
3
3
  # For beta should be x.y.zbN
4
- tskit_version = "1.0.0b3"
4
+ tskit_version = "1.0.1"
tskit/drawing.py CHANGED
@@ -31,10 +31,8 @@ import numbers
31
31
  import operator
32
32
  import warnings
33
33
  import xml.dom.minidom
34
+ from collections.abc import Mapping
34
35
  from dataclasses import dataclass
35
- from typing import List
36
- from typing import Mapping
37
- from typing import Union
38
36
 
39
37
  import numpy as np
40
38
 
@@ -538,7 +536,7 @@ def clip_ts(ts, x_min, x_max, max_num_trees=None):
538
536
  return ts, tree_status, offsets
539
537
 
540
538
 
541
- def check_y_ticks(ticks: Union[List, Mapping, None]) -> Mapping:
539
+ def check_y_ticks(ticks: list | Mapping | None) -> Mapping:
542
540
  """
543
541
  Later we might want to implement a tick locator function, such that e.g. ticks=5
544
542
  selects ~5 nicely spaced tick locations (with sensible behaviour for log scales)
tskit/genotypes.py CHANGED
@@ -38,12 +38,13 @@ import tskit.util as util
38
38
  class Variant:
39
39
  """
40
40
  A variant in a tree sequence, describing the observed genetic variation
41
- among samples for a given site. A variant consists of (a) a tuple of
42
- **alleles** listing the potential allelic states which samples at this site
43
- can possess; (b) an array of **genotypes** mapping sample IDs to the observed
44
- alleles (c) a reference to the :class:`Site` at which the Variant has been decoded
45
- and (d) an array of **samples** giving the node id to which the each element of
46
- the genotypes array corresponds.
41
+ among the specified nodes (by default, the sample nodes) for a given site.
42
+ A variant consists of (a) a tuple of **alleles** listing the potential
43
+ allelic states which the requested nodes at this site can possess; (b) an
44
+ array of **genotypes** mapping node IDs to the observed alleles; (c) a
45
+ reference to the :class:`Site` at which the Variant has been decoded; and
46
+ (d) an array of **samples** giving the node ID to which each element of the
47
+ genotypes array corresponds.
47
48
 
48
49
  After creation a Variant is not yet decoded, and has no genotypes.
49
50
  To decode a Variant, call the :meth:`decode` method. The Variant class will then
@@ -72,12 +73,13 @@ class Variant:
72
73
  In this case, there is no indication of which allele is the ancestral state,
73
74
  as the ordering is determined by the user.
74
75
 
75
- The ``genotypes`` represent the observed allelic states for each sample,
76
- such that ``var.alleles[var.genotypes[j]]`` gives the string allele
77
- for sample ID ``j``. Thus, the elements of the genotypes array are
76
+ The ``genotypes`` represent the observed allelic states for each requested
77
+ node, such that ``var.alleles[var.genotypes[j]]`` gives the string allele
78
+ for the node at index ``j`` (i.e., for ``variant.samples[j]``). Thus, the
79
+ elements of the genotypes array are
78
80
  indexes into the ``alleles`` list. The genotypes are provided in this
79
81
  way via a numpy numeric array to enable efficient calculations. To obtain a
80
- (less efficient) array of allele strings for each sample, you can use e.g.
82
+ (less efficient) array of allele strings for each node, you can use e.g.
81
83
  ``np.asarray(variant.alleles)[variant.genotypes]``.
82
84
 
83
85
  When :ref:`missing data<sec_data_model_missing_data>` is present at a given
@@ -95,10 +97,11 @@ class Variant:
95
97
  :param TreeSequence tree_sequence: The tree sequence to which this variant
96
98
  belongs.
97
99
  :param array_like samples: An array of node IDs for which to generate
98
- genotypes, or None for all sample nodes. Default: None.
100
+ genotypes, or ``None`` for all sample nodes. Non-sample nodes may also
101
+ be provided to generate genotypes for internal nodes. Default: ``None``.
99
102
  :param bool isolated_as_missing: If True, the genotype value assigned to
100
- missing samples (i.e., isolated samples without mutations) is
101
- :data:`.MISSING_DATA` (-1). If False, missing samples will be
103
+ isolated nodes without mutations (samples or non-samples) is
104
+ :data:`.MISSING_DATA` (-1). If False, such nodes will be
102
105
  assigned the allele index for the ancestral state.
103
106
  Default: True.
104
107
  :param tuple alleles: A tuple of strings defining the encoding of
@@ -143,7 +146,7 @@ class Variant:
143
146
  @property
144
147
  def alleles(self) -> tuple[str | None, ...]:
145
148
  """
146
- A tuple of the allelic values which samples can possess at the current
149
+ A tuple of the allelic values which nodes can possess at the current
147
150
  site. Unless an encoding of alleles is specified when creating this
148
151
  variant instance, the first element of this tuple is always the site's
149
152
  ancestral state.
@@ -162,7 +165,7 @@ class Variant:
162
165
  def genotypes(self) -> np.ndarray:
163
166
  """
164
167
  An array of indexes into the list ``alleles``, giving the
165
- state of each sample at the current site.
168
+ state of each requested node at the current site.
166
169
  """
167
170
  self._check_decoded()
168
171
  return self._ll_variant.genotypes
@@ -170,8 +173,8 @@ class Variant:
170
173
  @property
171
174
  def isolated_as_missing(self) -> bool:
172
175
  """
173
- True if isolated samples are decoded to missing data. If False, isolated
174
- samples are decoded to the ancestral state.
176
+ True if isolated nodes are decoded to missing data. If False, isolated
177
+ nodes are decoded to the ancestral state.
175
178
  """
176
179
  return self._ll_variant.isolated_as_missing
177
180
 
@@ -179,7 +182,7 @@ class Variant:
179
182
  def has_missing_data(self) -> bool:
180
183
  """
181
184
  True if there is missing data for any of the
182
- samples at the current site.
185
+ requested nodes at the current site.
183
186
  """
184
187
  alleles = self._ll_variant.alleles
185
188
  return len(alleles) > 0 and alleles[-1] is None
@@ -187,7 +190,7 @@ class Variant:
187
190
  @property
188
191
  def num_missing(self) -> int:
189
192
  """
190
- The number of samples with missing data at this site.
193
+ The number of requested nodes with missing data at this site.
191
194
  """
192
195
  return np.sum(self.genotypes == tskit.NULL)
193
196
 
@@ -199,7 +202,7 @@ class Variant:
199
202
  array: firstly missing data is not counted as an allele, and secondly,
200
203
  the site may contain mutations to alternative allele states (which are
201
204
  counted in the number of alleles) without the mutation being inherited
202
- by any of the samples.
205
+ by any of the requested nodes.
203
206
  """
204
207
  return len(self.alleles) - self.has_missing_data
205
208
 
tskit/metadata.py CHANGED
@@ -33,9 +33,9 @@ import json
33
33
  import pprint
34
34
  import struct
35
35
  import types
36
+ from collections.abc import Mapping
36
37
  from itertools import islice
37
38
  from typing import Any
38
- from typing import Mapping
39
39
 
40
40
  import jsonschema
41
41
  import numpy as np
tskit/tables.py CHANGED
@@ -32,9 +32,6 @@ import numbers
32
32
  import operator
33
33
  import warnings
34
34
  from dataclasses import dataclass
35
- from typing import Dict
36
- from typing import Optional
37
- from typing import Union
38
35
 
39
36
  import numpy as np
40
37
 
@@ -84,7 +81,7 @@ class IndividualTableRow(util.Dataclass):
84
81
  """
85
82
  See :attr:`Individual.parents`
86
83
  """
87
- metadata: Optional[Union[bytes, dict]]
84
+ metadata: bytes | dict | None
88
85
  """
89
86
  See :attr:`Individual.metadata`
90
87
  """
@@ -124,7 +121,7 @@ class NodeTableRow(util.Dataclass):
124
121
  """
125
122
  See :attr:`Node.individual`
126
123
  """
127
- metadata: Optional[Union[bytes, dict]]
124
+ metadata: bytes | dict | None
128
125
  """
129
126
  See :attr:`Node.metadata`
130
127
  """
@@ -154,7 +151,7 @@ class EdgeTableRow(util.Dataclass):
154
151
  """
155
152
  See :attr:`Edge.child`
156
153
  """
157
- metadata: Optional[Union[bytes, dict]]
154
+ metadata: bytes | dict | None
158
155
  """
159
156
  See :attr:`Edge.metadata`
160
157
  """
@@ -192,7 +189,7 @@ class MigrationTableRow(util.Dataclass):
192
189
  """
193
190
  See :attr:`Migration.time`
194
191
  """
195
- metadata: Optional[Union[bytes, dict]]
192
+ metadata: bytes | dict | None
196
193
  """
197
194
  See :attr:`Migration.metadata`
198
195
  """
@@ -214,7 +211,7 @@ class SiteTableRow(util.Dataclass):
214
211
  """
215
212
  See :attr:`Site.ancestral_state`
216
213
  """
217
- metadata: Optional[Union[bytes, dict]]
214
+ metadata: bytes | dict | None
218
215
  """
219
216
  See :attr:`Site.metadata`
220
217
  """
@@ -244,7 +241,7 @@ class MutationTableRow(util.Dataclass):
244
241
  """
245
242
  See :attr:`Mutation.parent`
246
243
  """
247
- metadata: Optional[Union[bytes, dict]]
244
+ metadata: bytes | dict | None
248
245
  """
249
246
  See :attr:`Mutation.metadata`
250
247
  """
@@ -279,7 +276,7 @@ class PopulationTableRow(util.Dataclass):
279
276
  """
280
277
 
281
278
  __slots__ = ["metadata"]
282
- metadata: Optional[Union[bytes, dict]]
279
+ metadata: bytes | dict | None
283
280
  """
284
281
  See :attr:`Population.metadata`
285
282
  """
@@ -2737,8 +2734,8 @@ class ProvenanceTable(MutableBaseTable):
2737
2734
  @dataclasses.dataclass(eq=True, order=True)
2738
2735
  class IdentitySegment:
2739
2736
  """
2740
- A single segment of identity spanning a genomic interval for a
2741
- a specific ancestor node.
2737
+ A single segment of identity by descent spanning a genomic interval
2738
+ for a specific ancestor node.
2742
2739
  """
2743
2740
 
2744
2741
  left: float
@@ -2758,7 +2755,7 @@ class IdentitySegment:
2758
2755
 
2759
2756
  class IdentitySegmentList(collections.abc.Iterable, collections.abc.Sized):
2760
2757
  """
2761
- A summary of identity segments for some pair of samples in a
2758
+ A summary of identity-by-descent segments for some pair of samples in a
2762
2759
  :class:`.IdentitySegments` result. If the ``store_segments`` argument
2763
2760
  has been specified to :meth:`.TreeSequence.ibd_segments`, this class
2764
2761
  can be treated as a sequence of :class:`.IdentitySegment` objects.
@@ -2769,7 +2766,9 @@ class IdentitySegmentList(collections.abc.Iterable, collections.abc.Sized):
2769
2766
 
2770
2767
  If ``store_segments`` is False, only the overall summary values
2771
2768
  such as :attr:`.IdentitySegmentList.total_span` and ``len()`` are
2772
- available.
2769
+ available. Attempting to iterate over the list or access per-segment
2770
+ arrays (``left``, ``right``, or ``node``) in this case will raise an
2771
+ ``IdentitySegmentsNotStoredError``.
2773
2772
 
2774
2773
  .. warning:: The order of segments within an IdentitySegmentList is
2775
2774
  arbitrary and may change in the future
@@ -2833,7 +2832,7 @@ class IdentitySegmentList(collections.abc.Iterable, collections.abc.Sized):
2833
2832
  class IdentitySegments(collections.abc.Mapping):
2834
2833
  """
2835
2834
  A class summarising and optionally storing the segments of identity
2836
- by state returned by :meth:`.TreeSequence.ibd_segments`. See the
2835
+ by descent returned by :meth:`.TreeSequence.ibd_segments`. See the
2837
2836
  :ref:`sec_identity` for more information and examples.
2838
2837
 
2839
2838
  Along with the documented methods and attributes, the class supports
@@ -2845,9 +2844,10 @@ class IdentitySegments(collections.abc.Mapping):
2845
2844
  for a given instance of this class are determined by the
2846
2845
  ``store_pairs`` and ``store_segments`` arguments provided to
2847
2846
  :meth:`.TreeSequence.ibd_segments`. For example, attempting
2848
- to access per-sample pair information if ``store_pairs``
2849
- is False will result in a (hopefully informative) error being
2850
- raised.
2847
+ to access per-sample pair information (such as indexing with
2848
+ ``[(a, b)]``, iterating over the mapping, or accessing
2849
+ :attr:`.IdentitySegments.pairs`) if ``store_pairs`` is False will
2850
+ result in an ``IdentityPairsNotStoredError`` being raised.
2851
2851
 
2852
2852
  .. warning:: This class should not be instantiated directly.
2853
2853
  """
@@ -3244,7 +3244,7 @@ class TableCollection(metadata.MetadataProvider):
3244
3244
  return self._ll_tables.asdict(force_offset_64)
3245
3245
 
3246
3246
  @property
3247
- def table_name_map(self) -> Dict:
3247
+ def table_name_map(self) -> dict:
3248
3248
  """
3249
3249
  Returns a dictionary mapping table names to the corresponding
3250
3250
  table instances. For example, the returned dictionary will contain the
@@ -3262,7 +3262,7 @@ class TableCollection(metadata.MetadataProvider):
3262
3262
  }
3263
3263
 
3264
3264
  @property
3265
- def name_map(self) -> Dict:
3265
+ def name_map(self) -> dict:
3266
3266
  # Deprecated in 0.4.1
3267
3267
  warnings.warn(
3268
3268
  "name_map is deprecated; use table_name_map instead",
@@ -3807,7 +3807,8 @@ class TableCollection(metadata.MetadataProvider):
3807
3807
  """
3808
3808
  Sorts the individual table in place, so that parents come before children,
3809
3809
  and the parent column is remapped as required. Node references to individuals
3810
- are also updated.
3810
+ are also updated. This is a stricter order than is required for a valid tree
3811
+ sequence.
3811
3812
  """
3812
3813
  self._ll_tables.sort_individuals()
3813
3814
  # TODO add provenance
@@ -3816,9 +3817,11 @@ class TableCollection(metadata.MetadataProvider):
3816
3817
  """
3817
3818
  This puts the tables in *canonical* form, imposing a stricter order on the
3818
3819
  tables than :ref:`required <sec_valid_tree_sequence_requirements>` for
3819
- a valid tree sequence. In particular, the individual
3820
- and population tables are sorted by the first node that refers to each
3821
- (see :meth:`TreeSequence.subset`). Then, the remaining tables are sorted
3820
+ a valid tree sequence. In particular, the population table is sorted to
3821
+ place populations with the lowest node IDs first, and the individual table
3822
+ is sorted firstly as in :meth:`.sort_individuals` and secondarily
3823
+ by the lowest ID of the nodes that refer to each individual
3824
+ (see :meth:`TreeSequence.subset`). The remaining tables are sorted
3822
3825
  as in :meth:`.sort`, with the modification that mutations are sorted by
3823
3826
  site, then time (if known), then the mutation's node's time, then number
3824
3827
  of descendant mutations (ensuring that parent mutations occur before
@@ -4337,6 +4340,9 @@ class TableCollection(metadata.MetadataProvider):
4337
4340
  check_shared_equality=True,
4338
4341
  add_populations=True,
4339
4342
  record_provenance=True,
4343
+ *,
4344
+ all_edges=False,
4345
+ all_mutations=False,
4340
4346
  ):
4341
4347
  """
4342
4348
  Modifies the table collection in place by adding the non-shared
@@ -4358,6 +4364,10 @@ class TableCollection(metadata.MetadataProvider):
4358
4364
  assigned new population IDs.
4359
4365
  :param bool record_provenance: Whether to record a provenance entry
4360
4366
  in the provenance table for this operation.
4367
+ :param bool all_edges: If True, then all edges in ``other`` are added
4368
+ to ``self``.
4369
+ :param bool all_mutations: If True, then all mutations in ``other`` are added
4370
+ to ``self``.
4361
4371
  """
4362
4372
  node_mapping = util.safe_np_int_cast(node_mapping, np.int32)
4363
4373
  self._ll_tables.union(
@@ -4365,6 +4375,8 @@ class TableCollection(metadata.MetadataProvider):
4365
4375
  node_mapping,
4366
4376
  check_shared_equality=check_shared_equality,
4367
4377
  add_populations=add_populations,
4378
+ all_edges=all_edges,
4379
+ all_mutations=all_mutations,
4368
4380
  )
4369
4381
  if record_provenance:
4370
4382
  other_records = [prov.record for prov in other.provenances]
@@ -4771,6 +4783,21 @@ class ImmutableTableCollection(metadata.MetadataProvider):
4771
4783
  ]
4772
4784
  )
4773
4785
 
4786
+ def link_ancestors(self, samples, ancestors):
4787
+ """
4788
+ See :meth:`TableCollection.link_ancestors`.
4789
+ """
4790
+ samples = util.safe_np_int_cast(samples, np.int32)
4791
+ ancestors = util.safe_np_int_cast(ancestors, np.int32)
4792
+ ll_edge_table = self._llts.link_ancestors(samples, ancestors)
4793
+ return EdgeTable(ll_table=ll_edge_table)
4794
+
4795
+ def map_ancestors(self, *args, **kwargs):
4796
+ """
4797
+ Deprecated alias for :meth:`link_ancestors`.
4798
+ """
4799
+ return self.link_ancestors(*args, **kwargs)
4800
+
4774
4801
  _MUTATOR_METHODS = {
4775
4802
  "clear",
4776
4803
  "sort",
@@ -4794,8 +4821,6 @@ class ImmutableTableCollection(metadata.MetadataProvider):
4794
4821
  "ibd_segments",
4795
4822
  "fromdict",
4796
4823
  "simplify",
4797
- "link_ancestors",
4798
- "map_ancestors",
4799
4824
  }
4800
4825
 
4801
4826
  def copy(self):
tskit/text_formats.py CHANGED
@@ -119,6 +119,7 @@ def write_nexus(
119
119
  include_alignments,
120
120
  reference_sequence,
121
121
  missing_data_character,
122
+ isolated_as_missing=None,
122
123
  ):
123
124
  # See TreeSequence.write_nexus for documentation on parameters.
124
125
  if precision is None:
@@ -154,6 +155,7 @@ def write_nexus(
154
155
  alignments = ts.alignments(
155
156
  reference_sequence=reference_sequence,
156
157
  missing_data_character=missing_data_character,
158
+ isolated_as_missing=isolated_as_missing,
157
159
  )
158
160
  for u, alignment in zip(ts.samples(), alignments):
159
161
  print(2 * indent, f"n{u}", " ", alignment, sep="", file=out)
@@ -196,6 +198,7 @@ def write_fasta(
196
198
  wrap_width,
197
199
  reference_sequence,
198
200
  missing_data_character,
201
+ isolated_as_missing=None,
199
202
  ):
200
203
  # See TreeSequence.write_fasta for documentation
201
204
  if wrap_width < 0 or int(wrap_width) != wrap_width:
@@ -208,6 +211,7 @@ def write_fasta(
208
211
  alignments = ts.alignments(
209
212
  reference_sequence=reference_sequence,
210
213
  missing_data_character=missing_data_character,
214
+ isolated_as_missing=isolated_as_missing,
211
215
  )
212
216
  for u, alignment in zip(ts.samples(), alignments):
213
217
  print(">", f"n{u}", sep="", file=output)