tskit 1.0.0b3__cp312-cp312-win_amd64.whl → 1.0.1__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _tskit.cp312-win_amd64.pyd +0 -0
- tskit/_version.py +1 -1
- tskit/drawing.py +2 -4
- tskit/genotypes.py +23 -20
- tskit/metadata.py +1 -1
- tskit/tables.py +51 -26
- tskit/text_formats.py +4 -0
- tskit/trees.py +413 -245
- tskit/util.py +6 -7
- {tskit-1.0.0b3.dist-info → tskit-1.0.1.dist-info}/METADATA +8 -8
- tskit-1.0.1.dist-info/RECORD +27 -0
- {tskit-1.0.0b3.dist-info → tskit-1.0.1.dist-info}/WHEEL +1 -1
- tskit-1.0.0b3.dist-info/RECORD +0 -27
- {tskit-1.0.0b3.dist-info → tskit-1.0.1.dist-info}/entry_points.txt +0 -0
- {tskit-1.0.0b3.dist-info → tskit-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {tskit-1.0.0b3.dist-info → tskit-1.0.1.dist-info}/top_level.txt +0 -0
_tskit.cp312-win_amd64.pyd
CHANGED
|
Binary file
|
tskit/_version.py
CHANGED
tskit/drawing.py
CHANGED
|
@@ -31,10 +31,8 @@ import numbers
|
|
|
31
31
|
import operator
|
|
32
32
|
import warnings
|
|
33
33
|
import xml.dom.minidom
|
|
34
|
+
from collections.abc import Mapping
|
|
34
35
|
from dataclasses import dataclass
|
|
35
|
-
from typing import List
|
|
36
|
-
from typing import Mapping
|
|
37
|
-
from typing import Union
|
|
38
36
|
|
|
39
37
|
import numpy as np
|
|
40
38
|
|
|
@@ -538,7 +536,7 @@ def clip_ts(ts, x_min, x_max, max_num_trees=None):
|
|
|
538
536
|
return ts, tree_status, offsets
|
|
539
537
|
|
|
540
538
|
|
|
541
|
-
def check_y_ticks(ticks:
|
|
539
|
+
def check_y_ticks(ticks: list | Mapping | None) -> Mapping:
|
|
542
540
|
"""
|
|
543
541
|
Later we might want to implement a tick locator function, such that e.g. ticks=5
|
|
544
542
|
selects ~5 nicely spaced tick locations (with sensible behaviour for log scales)
|
tskit/genotypes.py
CHANGED
|
@@ -38,12 +38,13 @@ import tskit.util as util
|
|
|
38
38
|
class Variant:
|
|
39
39
|
"""
|
|
40
40
|
A variant in a tree sequence, describing the observed genetic variation
|
|
41
|
-
among
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
41
|
+
among the specified nodes (by default, the sample nodes) for a given site.
|
|
42
|
+
A variant consists of (a) a tuple of **alleles** listing the potential
|
|
43
|
+
allelic states which the requested nodes at this site can possess; (b) an
|
|
44
|
+
array of **genotypes** mapping node IDs to the observed alleles; (c) a
|
|
45
|
+
reference to the :class:`Site` at which the Variant has been decoded; and
|
|
46
|
+
(d) an array of **samples** giving the node ID to which each element of the
|
|
47
|
+
genotypes array corresponds.
|
|
47
48
|
|
|
48
49
|
After creation a Variant is not yet decoded, and has no genotypes.
|
|
49
50
|
To decode a Variant, call the :meth:`decode` method. The Variant class will then
|
|
@@ -72,12 +73,13 @@ class Variant:
|
|
|
72
73
|
In this case, there is no indication of which allele is the ancestral state,
|
|
73
74
|
as the ordering is determined by the user.
|
|
74
75
|
|
|
75
|
-
The ``genotypes`` represent the observed allelic states for each
|
|
76
|
-
such that ``var.alleles[var.genotypes[j]]`` gives the string allele
|
|
77
|
-
for
|
|
76
|
+
The ``genotypes`` represent the observed allelic states for each requested
|
|
77
|
+
node, such that ``var.alleles[var.genotypes[j]]`` gives the string allele
|
|
78
|
+
for the node at index ``j`` (i.e., for ``variant.samples[j]``). Thus, the
|
|
79
|
+
elements of the genotypes array are
|
|
78
80
|
indexes into the ``alleles`` list. The genotypes are provided in this
|
|
79
81
|
way via a numpy numeric array to enable efficient calculations. To obtain a
|
|
80
|
-
(less efficient) array of allele strings for each
|
|
82
|
+
(less efficient) array of allele strings for each node, you can use e.g.
|
|
81
83
|
``np.asarray(variant.alleles)[variant.genotypes]``.
|
|
82
84
|
|
|
83
85
|
When :ref:`missing data<sec_data_model_missing_data>` is present at a given
|
|
@@ -95,10 +97,11 @@ class Variant:
|
|
|
95
97
|
:param TreeSequence tree_sequence: The tree sequence to which this variant
|
|
96
98
|
belongs.
|
|
97
99
|
:param array_like samples: An array of node IDs for which to generate
|
|
98
|
-
genotypes, or None for all sample nodes.
|
|
100
|
+
genotypes, or ``None`` for all sample nodes. Non-sample nodes may also
|
|
101
|
+
be provided to generate genotypes for internal nodes. Default: ``None``.
|
|
99
102
|
:param bool isolated_as_missing: If True, the genotype value assigned to
|
|
100
|
-
|
|
101
|
-
:data:`.MISSING_DATA` (-1). If False,
|
|
103
|
+
isolated nodes without mutations (samples or non-samples) is
|
|
104
|
+
:data:`.MISSING_DATA` (-1). If False, such nodes will be
|
|
102
105
|
assigned the allele index for the ancestral state.
|
|
103
106
|
Default: True.
|
|
104
107
|
:param tuple alleles: A tuple of strings defining the encoding of
|
|
@@ -143,7 +146,7 @@ class Variant:
|
|
|
143
146
|
@property
|
|
144
147
|
def alleles(self) -> tuple[str | None, ...]:
|
|
145
148
|
"""
|
|
146
|
-
A tuple of the allelic values which
|
|
149
|
+
A tuple of the allelic values which nodes can possess at the current
|
|
147
150
|
site. Unless an encoding of alleles is specified when creating this
|
|
148
151
|
variant instance, the first element of this tuple is always the site's
|
|
149
152
|
ancestral state.
|
|
@@ -162,7 +165,7 @@ class Variant:
|
|
|
162
165
|
def genotypes(self) -> np.ndarray:
|
|
163
166
|
"""
|
|
164
167
|
An array of indexes into the list ``alleles``, giving the
|
|
165
|
-
state of each
|
|
168
|
+
state of each requested node at the current site.
|
|
166
169
|
"""
|
|
167
170
|
self._check_decoded()
|
|
168
171
|
return self._ll_variant.genotypes
|
|
@@ -170,8 +173,8 @@ class Variant:
|
|
|
170
173
|
@property
|
|
171
174
|
def isolated_as_missing(self) -> bool:
|
|
172
175
|
"""
|
|
173
|
-
True if isolated
|
|
174
|
-
|
|
176
|
+
True if isolated nodes are decoded to missing data. If False, isolated
|
|
177
|
+
nodes are decoded to the ancestral state.
|
|
175
178
|
"""
|
|
176
179
|
return self._ll_variant.isolated_as_missing
|
|
177
180
|
|
|
@@ -179,7 +182,7 @@ class Variant:
|
|
|
179
182
|
def has_missing_data(self) -> bool:
|
|
180
183
|
"""
|
|
181
184
|
True if there is missing data for any of the
|
|
182
|
-
|
|
185
|
+
requested nodes at the current site.
|
|
183
186
|
"""
|
|
184
187
|
alleles = self._ll_variant.alleles
|
|
185
188
|
return len(alleles) > 0 and alleles[-1] is None
|
|
@@ -187,7 +190,7 @@ class Variant:
|
|
|
187
190
|
@property
|
|
188
191
|
def num_missing(self) -> int:
|
|
189
192
|
"""
|
|
190
|
-
The number of
|
|
193
|
+
The number of requested nodes with missing data at this site.
|
|
191
194
|
"""
|
|
192
195
|
return np.sum(self.genotypes == tskit.NULL)
|
|
193
196
|
|
|
@@ -199,7 +202,7 @@ class Variant:
|
|
|
199
202
|
array: firstly missing data is not counted as an allele, and secondly,
|
|
200
203
|
the site may contain mutations to alternative allele states (which are
|
|
201
204
|
counted in the number of alleles) without the mutation being inherited
|
|
202
|
-
by any of the
|
|
205
|
+
by any of the requested nodes.
|
|
203
206
|
"""
|
|
204
207
|
return len(self.alleles) - self.has_missing_data
|
|
205
208
|
|
tskit/metadata.py
CHANGED
tskit/tables.py
CHANGED
|
@@ -32,9 +32,6 @@ import numbers
|
|
|
32
32
|
import operator
|
|
33
33
|
import warnings
|
|
34
34
|
from dataclasses import dataclass
|
|
35
|
-
from typing import Dict
|
|
36
|
-
from typing import Optional
|
|
37
|
-
from typing import Union
|
|
38
35
|
|
|
39
36
|
import numpy as np
|
|
40
37
|
|
|
@@ -84,7 +81,7 @@ class IndividualTableRow(util.Dataclass):
|
|
|
84
81
|
"""
|
|
85
82
|
See :attr:`Individual.parents`
|
|
86
83
|
"""
|
|
87
|
-
metadata:
|
|
84
|
+
metadata: bytes | dict | None
|
|
88
85
|
"""
|
|
89
86
|
See :attr:`Individual.metadata`
|
|
90
87
|
"""
|
|
@@ -124,7 +121,7 @@ class NodeTableRow(util.Dataclass):
|
|
|
124
121
|
"""
|
|
125
122
|
See :attr:`Node.individual`
|
|
126
123
|
"""
|
|
127
|
-
metadata:
|
|
124
|
+
metadata: bytes | dict | None
|
|
128
125
|
"""
|
|
129
126
|
See :attr:`Node.metadata`
|
|
130
127
|
"""
|
|
@@ -154,7 +151,7 @@ class EdgeTableRow(util.Dataclass):
|
|
|
154
151
|
"""
|
|
155
152
|
See :attr:`Edge.child`
|
|
156
153
|
"""
|
|
157
|
-
metadata:
|
|
154
|
+
metadata: bytes | dict | None
|
|
158
155
|
"""
|
|
159
156
|
See :attr:`Edge.metadata`
|
|
160
157
|
"""
|
|
@@ -192,7 +189,7 @@ class MigrationTableRow(util.Dataclass):
|
|
|
192
189
|
"""
|
|
193
190
|
See :attr:`Migration.time`
|
|
194
191
|
"""
|
|
195
|
-
metadata:
|
|
192
|
+
metadata: bytes | dict | None
|
|
196
193
|
"""
|
|
197
194
|
See :attr:`Migration.metadata`
|
|
198
195
|
"""
|
|
@@ -214,7 +211,7 @@ class SiteTableRow(util.Dataclass):
|
|
|
214
211
|
"""
|
|
215
212
|
See :attr:`Site.ancestral_state`
|
|
216
213
|
"""
|
|
217
|
-
metadata:
|
|
214
|
+
metadata: bytes | dict | None
|
|
218
215
|
"""
|
|
219
216
|
See :attr:`Site.metadata`
|
|
220
217
|
"""
|
|
@@ -244,7 +241,7 @@ class MutationTableRow(util.Dataclass):
|
|
|
244
241
|
"""
|
|
245
242
|
See :attr:`Mutation.parent`
|
|
246
243
|
"""
|
|
247
|
-
metadata:
|
|
244
|
+
metadata: bytes | dict | None
|
|
248
245
|
"""
|
|
249
246
|
See :attr:`Mutation.metadata`
|
|
250
247
|
"""
|
|
@@ -279,7 +276,7 @@ class PopulationTableRow(util.Dataclass):
|
|
|
279
276
|
"""
|
|
280
277
|
|
|
281
278
|
__slots__ = ["metadata"]
|
|
282
|
-
metadata:
|
|
279
|
+
metadata: bytes | dict | None
|
|
283
280
|
"""
|
|
284
281
|
See :attr:`Population.metadata`
|
|
285
282
|
"""
|
|
@@ -2737,8 +2734,8 @@ class ProvenanceTable(MutableBaseTable):
|
|
|
2737
2734
|
@dataclasses.dataclass(eq=True, order=True)
|
|
2738
2735
|
class IdentitySegment:
|
|
2739
2736
|
"""
|
|
2740
|
-
A single segment of identity spanning a genomic interval
|
|
2741
|
-
a specific ancestor node.
|
|
2737
|
+
A single segment of identity by descent spanning a genomic interval
|
|
2738
|
+
for a specific ancestor node.
|
|
2742
2739
|
"""
|
|
2743
2740
|
|
|
2744
2741
|
left: float
|
|
@@ -2758,7 +2755,7 @@ class IdentitySegment:
|
|
|
2758
2755
|
|
|
2759
2756
|
class IdentitySegmentList(collections.abc.Iterable, collections.abc.Sized):
|
|
2760
2757
|
"""
|
|
2761
|
-
A summary of identity segments for some pair of samples in a
|
|
2758
|
+
A summary of identity-by-descent segments for some pair of samples in a
|
|
2762
2759
|
:class:`.IdentitySegments` result. If the ``store_segments`` argument
|
|
2763
2760
|
has been specified to :meth:`.TreeSequence.ibd_segments`, this class
|
|
2764
2761
|
can be treated as a sequence of :class:`.IdentitySegment` objects.
|
|
@@ -2769,7 +2766,9 @@ class IdentitySegmentList(collections.abc.Iterable, collections.abc.Sized):
|
|
|
2769
2766
|
|
|
2770
2767
|
If ``store_segments`` is False, only the overall summary values
|
|
2771
2768
|
such as :attr:`.IdentitySegmentList.total_span` and ``len()`` are
|
|
2772
|
-
available.
|
|
2769
|
+
available. Attempting to iterate over the list or access per-segment
|
|
2770
|
+
arrays (``left``, ``right``, or ``node``) in this case will raise an
|
|
2771
|
+
``IdentitySegmentsNotStoredError``.
|
|
2773
2772
|
|
|
2774
2773
|
.. warning:: The order of segments within an IdentitySegmentList is
|
|
2775
2774
|
arbitrary and may change in the future
|
|
@@ -2833,7 +2832,7 @@ class IdentitySegmentList(collections.abc.Iterable, collections.abc.Sized):
|
|
|
2833
2832
|
class IdentitySegments(collections.abc.Mapping):
|
|
2834
2833
|
"""
|
|
2835
2834
|
A class summarising and optionally storing the segments of identity
|
|
2836
|
-
by
|
|
2835
|
+
by descent returned by :meth:`.TreeSequence.ibd_segments`. See the
|
|
2837
2836
|
:ref:`sec_identity` for more information and examples.
|
|
2838
2837
|
|
|
2839
2838
|
Along with the documented methods and attributes, the class supports
|
|
@@ -2845,9 +2844,10 @@ class IdentitySegments(collections.abc.Mapping):
|
|
|
2845
2844
|
for a given instance of this class are determined by the
|
|
2846
2845
|
``store_pairs`` and ``store_segments`` arguments provided to
|
|
2847
2846
|
:meth:`.TreeSequence.ibd_segments`. For example, attempting
|
|
2848
|
-
to access per-sample pair information
|
|
2849
|
-
|
|
2850
|
-
|
|
2847
|
+
to access per-sample pair information (such as indexing with
|
|
2848
|
+
``[(a, b)]``, iterating over the mapping, or accessing
|
|
2849
|
+
:attr:`.IdentitySegments.pairs`) if ``store_pairs`` is False will
|
|
2850
|
+
result in an ``IdentityPairsNotStoredError`` being raised.
|
|
2851
2851
|
|
|
2852
2852
|
.. warning:: This class should not be instantiated directly.
|
|
2853
2853
|
"""
|
|
@@ -3244,7 +3244,7 @@ class TableCollection(metadata.MetadataProvider):
|
|
|
3244
3244
|
return self._ll_tables.asdict(force_offset_64)
|
|
3245
3245
|
|
|
3246
3246
|
@property
|
|
3247
|
-
def table_name_map(self) ->
|
|
3247
|
+
def table_name_map(self) -> dict:
|
|
3248
3248
|
"""
|
|
3249
3249
|
Returns a dictionary mapping table names to the corresponding
|
|
3250
3250
|
table instances. For example, the returned dictionary will contain the
|
|
@@ -3262,7 +3262,7 @@ class TableCollection(metadata.MetadataProvider):
|
|
|
3262
3262
|
}
|
|
3263
3263
|
|
|
3264
3264
|
@property
|
|
3265
|
-
def name_map(self) ->
|
|
3265
|
+
def name_map(self) -> dict:
|
|
3266
3266
|
# Deprecated in 0.4.1
|
|
3267
3267
|
warnings.warn(
|
|
3268
3268
|
"name_map is deprecated; use table_name_map instead",
|
|
@@ -3807,7 +3807,8 @@ class TableCollection(metadata.MetadataProvider):
|
|
|
3807
3807
|
"""
|
|
3808
3808
|
Sorts the individual table in place, so that parents come before children,
|
|
3809
3809
|
and the parent column is remapped as required. Node references to individuals
|
|
3810
|
-
are also updated.
|
|
3810
|
+
are also updated. This is a stricter order than is required for a valid tree
|
|
3811
|
+
sequence.
|
|
3811
3812
|
"""
|
|
3812
3813
|
self._ll_tables.sort_individuals()
|
|
3813
3814
|
# TODO add provenance
|
|
@@ -3816,9 +3817,11 @@ class TableCollection(metadata.MetadataProvider):
|
|
|
3816
3817
|
"""
|
|
3817
3818
|
This puts the tables in *canonical* form, imposing a stricter order on the
|
|
3818
3819
|
tables than :ref:`required <sec_valid_tree_sequence_requirements>` for
|
|
3819
|
-
a valid tree sequence. In particular, the
|
|
3820
|
-
|
|
3821
|
-
|
|
3820
|
+
a valid tree sequence. In particular, the population table is sorted to
|
|
3821
|
+
place populations with the lowest node IDs first, and the individual table
|
|
3822
|
+
is sorted firstly as in :meth:`.sort_individuals` and secondarily
|
|
3823
|
+
by the lowest ID of the nodes that refer to each individual
|
|
3824
|
+
(see :meth:`TreeSequence.subset`). The remaining tables are sorted
|
|
3822
3825
|
as in :meth:`.sort`, with the modification that mutations are sorted by
|
|
3823
3826
|
site, then time (if known), then the mutation's node's time, then number
|
|
3824
3827
|
of descendant mutations (ensuring that parent mutations occur before
|
|
@@ -4337,6 +4340,9 @@ class TableCollection(metadata.MetadataProvider):
|
|
|
4337
4340
|
check_shared_equality=True,
|
|
4338
4341
|
add_populations=True,
|
|
4339
4342
|
record_provenance=True,
|
|
4343
|
+
*,
|
|
4344
|
+
all_edges=False,
|
|
4345
|
+
all_mutations=False,
|
|
4340
4346
|
):
|
|
4341
4347
|
"""
|
|
4342
4348
|
Modifies the table collection in place by adding the non-shared
|
|
@@ -4358,6 +4364,10 @@ class TableCollection(metadata.MetadataProvider):
|
|
|
4358
4364
|
assigned new population IDs.
|
|
4359
4365
|
:param bool record_provenance: Whether to record a provenance entry
|
|
4360
4366
|
in the provenance table for this operation.
|
|
4367
|
+
:param bool all_edges: If True, then all edges in ``other`` are added
|
|
4368
|
+
to ``self``.
|
|
4369
|
+
:param bool all_mutations: If True, then all mutations in ``other`` are added
|
|
4370
|
+
to ``self``.
|
|
4361
4371
|
"""
|
|
4362
4372
|
node_mapping = util.safe_np_int_cast(node_mapping, np.int32)
|
|
4363
4373
|
self._ll_tables.union(
|
|
@@ -4365,6 +4375,8 @@ class TableCollection(metadata.MetadataProvider):
|
|
|
4365
4375
|
node_mapping,
|
|
4366
4376
|
check_shared_equality=check_shared_equality,
|
|
4367
4377
|
add_populations=add_populations,
|
|
4378
|
+
all_edges=all_edges,
|
|
4379
|
+
all_mutations=all_mutations,
|
|
4368
4380
|
)
|
|
4369
4381
|
if record_provenance:
|
|
4370
4382
|
other_records = [prov.record for prov in other.provenances]
|
|
@@ -4771,6 +4783,21 @@ class ImmutableTableCollection(metadata.MetadataProvider):
|
|
|
4771
4783
|
]
|
|
4772
4784
|
)
|
|
4773
4785
|
|
|
4786
|
+
def link_ancestors(self, samples, ancestors):
|
|
4787
|
+
"""
|
|
4788
|
+
See :meth:`TableCollection.link_ancestors`.
|
|
4789
|
+
"""
|
|
4790
|
+
samples = util.safe_np_int_cast(samples, np.int32)
|
|
4791
|
+
ancestors = util.safe_np_int_cast(ancestors, np.int32)
|
|
4792
|
+
ll_edge_table = self._llts.link_ancestors(samples, ancestors)
|
|
4793
|
+
return EdgeTable(ll_table=ll_edge_table)
|
|
4794
|
+
|
|
4795
|
+
def map_ancestors(self, *args, **kwargs):
|
|
4796
|
+
"""
|
|
4797
|
+
Deprecated alias for :meth:`link_ancestors`.
|
|
4798
|
+
"""
|
|
4799
|
+
return self.link_ancestors(*args, **kwargs)
|
|
4800
|
+
|
|
4774
4801
|
_MUTATOR_METHODS = {
|
|
4775
4802
|
"clear",
|
|
4776
4803
|
"sort",
|
|
@@ -4794,8 +4821,6 @@ class ImmutableTableCollection(metadata.MetadataProvider):
|
|
|
4794
4821
|
"ibd_segments",
|
|
4795
4822
|
"fromdict",
|
|
4796
4823
|
"simplify",
|
|
4797
|
-
"link_ancestors",
|
|
4798
|
-
"map_ancestors",
|
|
4799
4824
|
}
|
|
4800
4825
|
|
|
4801
4826
|
def copy(self):
|
tskit/text_formats.py
CHANGED
|
@@ -119,6 +119,7 @@ def write_nexus(
|
|
|
119
119
|
include_alignments,
|
|
120
120
|
reference_sequence,
|
|
121
121
|
missing_data_character,
|
|
122
|
+
isolated_as_missing=None,
|
|
122
123
|
):
|
|
123
124
|
# See TreeSequence.write_nexus for documentation on parameters.
|
|
124
125
|
if precision is None:
|
|
@@ -154,6 +155,7 @@ def write_nexus(
|
|
|
154
155
|
alignments = ts.alignments(
|
|
155
156
|
reference_sequence=reference_sequence,
|
|
156
157
|
missing_data_character=missing_data_character,
|
|
158
|
+
isolated_as_missing=isolated_as_missing,
|
|
157
159
|
)
|
|
158
160
|
for u, alignment in zip(ts.samples(), alignments):
|
|
159
161
|
print(2 * indent, f"n{u}", " ", alignment, sep="", file=out)
|
|
@@ -196,6 +198,7 @@ def write_fasta(
|
|
|
196
198
|
wrap_width,
|
|
197
199
|
reference_sequence,
|
|
198
200
|
missing_data_character,
|
|
201
|
+
isolated_as_missing=None,
|
|
199
202
|
):
|
|
200
203
|
# See TreeSequence.write_fasta for documentation
|
|
201
204
|
if wrap_width < 0 or int(wrap_width) != wrap_width:
|
|
@@ -208,6 +211,7 @@ def write_fasta(
|
|
|
208
211
|
alignments = ts.alignments(
|
|
209
212
|
reference_sequence=reference_sequence,
|
|
210
213
|
missing_data_character=missing_data_character,
|
|
214
|
+
isolated_as_missing=isolated_as_missing,
|
|
211
215
|
)
|
|
212
216
|
for u, alignment in zip(ts.samples(), alignments):
|
|
213
217
|
print(">", f"n{u}", sep="", file=output)
|