tskit 1.0.0b2__cp313-cp313-win_amd64.whl → 1.0.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _tskit.cp313-win_amd64.pyd +0 -0
- tskit/_version.py +1 -1
- tskit/combinatorics.py +6 -0
- tskit/drawing.py +2 -4
- tskit/exceptions.py +8 -0
- tskit/genotypes.py +23 -20
- tskit/metadata.py +105 -1
- tskit/tables.py +1043 -493
- tskit/text_formats.py +4 -0
- tskit/trees.py +440 -261
- tskit/util.py +6 -7
- {tskit-1.0.0b2.dist-info → tskit-1.0.1.dist-info}/METADATA +8 -8
- tskit-1.0.1.dist-info/RECORD +27 -0
- {tskit-1.0.0b2.dist-info → tskit-1.0.1.dist-info}/WHEEL +1 -1
- tskit-1.0.0b2.dist-info/RECORD +0 -27
- {tskit-1.0.0b2.dist-info → tskit-1.0.1.dist-info}/entry_points.txt +0 -0
- {tskit-1.0.0b2.dist-info → tskit-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {tskit-1.0.0b2.dist-info → tskit-1.0.1.dist-info}/top_level.txt +0 -0
_tskit.cp313-win_amd64.pyd
CHANGED
|
Binary file
|
tskit/_version.py
CHANGED
tskit/combinatorics.py
CHANGED
|
@@ -541,6 +541,12 @@ class TopologyCounter:
|
|
|
541
541
|
k = TopologyCounter._to_key(sample_set_indexes)
|
|
542
542
|
self.topologies[k] = counter
|
|
543
543
|
|
|
544
|
+
def __iter__(self):
|
|
545
|
+
raise TypeError(
|
|
546
|
+
"TopologyCounter object is not iterable, "
|
|
547
|
+
"iterate over '.topologies' instead"
|
|
548
|
+
)
|
|
549
|
+
|
|
544
550
|
@staticmethod
|
|
545
551
|
def _to_key(sample_set_indexes):
|
|
546
552
|
if not isinstance(sample_set_indexes, collections.abc.Iterable):
|
tskit/drawing.py
CHANGED
|
@@ -31,10 +31,8 @@ import numbers
|
|
|
31
31
|
import operator
|
|
32
32
|
import warnings
|
|
33
33
|
import xml.dom.minidom
|
|
34
|
+
from collections.abc import Mapping
|
|
34
35
|
from dataclasses import dataclass
|
|
35
|
-
from typing import List
|
|
36
|
-
from typing import Mapping
|
|
37
|
-
from typing import Union
|
|
38
36
|
|
|
39
37
|
import numpy as np
|
|
40
38
|
|
|
@@ -538,7 +536,7 @@ def clip_ts(ts, x_min, x_max, max_num_trees=None):
|
|
|
538
536
|
return ts, tree_status, offsets
|
|
539
537
|
|
|
540
538
|
|
|
541
|
-
def check_y_ticks(ticks:
|
|
539
|
+
def check_y_ticks(ticks: list | Mapping | None) -> Mapping:
|
|
542
540
|
"""
|
|
543
541
|
Later we might want to implement a tick locator function, such that e.g. ticks=5
|
|
544
542
|
selects ~5 nicely spaced tick locations (with sensible behaviour for log scales)
|
tskit/exceptions.py
CHANGED
|
@@ -60,3 +60,11 @@ class MetadataEncodingError(TskitException):
|
|
|
60
60
|
"""
|
|
61
61
|
A metadata object was of a type that could not be encoded
|
|
62
62
|
"""
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class ImmutableTableError(ValueError):
|
|
66
|
+
"""
|
|
67
|
+
Raised when attempting to modify an immutable table view.
|
|
68
|
+
|
|
69
|
+
Use TreeSequence.dump_tables() to get a mutable copy.
|
|
70
|
+
"""
|
tskit/genotypes.py
CHANGED
|
@@ -38,12 +38,13 @@ import tskit.util as util
|
|
|
38
38
|
class Variant:
|
|
39
39
|
"""
|
|
40
40
|
A variant in a tree sequence, describing the observed genetic variation
|
|
41
|
-
among
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
41
|
+
among the specified nodes (by default, the sample nodes) for a given site.
|
|
42
|
+
A variant consists of (a) a tuple of **alleles** listing the potential
|
|
43
|
+
allelic states which the requested nodes at this site can possess; (b) an
|
|
44
|
+
array of **genotypes** mapping node IDs to the observed alleles; (c) a
|
|
45
|
+
reference to the :class:`Site` at which the Variant has been decoded; and
|
|
46
|
+
(d) an array of **samples** giving the node ID to which each element of the
|
|
47
|
+
genotypes array corresponds.
|
|
47
48
|
|
|
48
49
|
After creation a Variant is not yet decoded, and has no genotypes.
|
|
49
50
|
To decode a Variant, call the :meth:`decode` method. The Variant class will then
|
|
@@ -72,12 +73,13 @@ class Variant:
|
|
|
72
73
|
In this case, there is no indication of which allele is the ancestral state,
|
|
73
74
|
as the ordering is determined by the user.
|
|
74
75
|
|
|
75
|
-
The ``genotypes`` represent the observed allelic states for each
|
|
76
|
-
such that ``var.alleles[var.genotypes[j]]`` gives the string allele
|
|
77
|
-
for
|
|
76
|
+
The ``genotypes`` represent the observed allelic states for each requested
|
|
77
|
+
node, such that ``var.alleles[var.genotypes[j]]`` gives the string allele
|
|
78
|
+
for the node at index ``j`` (i.e., for ``variant.samples[j]``). Thus, the
|
|
79
|
+
elements of the genotypes array are
|
|
78
80
|
indexes into the ``alleles`` list. The genotypes are provided in this
|
|
79
81
|
way via a numpy numeric array to enable efficient calculations. To obtain a
|
|
80
|
-
(less efficient) array of allele strings for each
|
|
82
|
+
(less efficient) array of allele strings for each node, you can use e.g.
|
|
81
83
|
``np.asarray(variant.alleles)[variant.genotypes]``.
|
|
82
84
|
|
|
83
85
|
When :ref:`missing data<sec_data_model_missing_data>` is present at a given
|
|
@@ -95,10 +97,11 @@ class Variant:
|
|
|
95
97
|
:param TreeSequence tree_sequence: The tree sequence to which this variant
|
|
96
98
|
belongs.
|
|
97
99
|
:param array_like samples: An array of node IDs for which to generate
|
|
98
|
-
genotypes, or None for all sample nodes.
|
|
100
|
+
genotypes, or ``None`` for all sample nodes. Non-sample nodes may also
|
|
101
|
+
be provided to generate genotypes for internal nodes. Default: ``None``.
|
|
99
102
|
:param bool isolated_as_missing: If True, the genotype value assigned to
|
|
100
|
-
|
|
101
|
-
:data:`.MISSING_DATA` (-1). If False,
|
|
103
|
+
isolated nodes without mutations (samples or non-samples) is
|
|
104
|
+
:data:`.MISSING_DATA` (-1). If False, such nodes will be
|
|
102
105
|
assigned the allele index for the ancestral state.
|
|
103
106
|
Default: True.
|
|
104
107
|
:param tuple alleles: A tuple of strings defining the encoding of
|
|
@@ -143,7 +146,7 @@ class Variant:
|
|
|
143
146
|
@property
|
|
144
147
|
def alleles(self) -> tuple[str | None, ...]:
|
|
145
148
|
"""
|
|
146
|
-
A tuple of the allelic values which
|
|
149
|
+
A tuple of the allelic values which nodes can possess at the current
|
|
147
150
|
site. Unless an encoding of alleles is specified when creating this
|
|
148
151
|
variant instance, the first element of this tuple is always the site's
|
|
149
152
|
ancestral state.
|
|
@@ -162,7 +165,7 @@ class Variant:
|
|
|
162
165
|
def genotypes(self) -> np.ndarray:
|
|
163
166
|
"""
|
|
164
167
|
An array of indexes into the list ``alleles``, giving the
|
|
165
|
-
state of each
|
|
168
|
+
state of each requested node at the current site.
|
|
166
169
|
"""
|
|
167
170
|
self._check_decoded()
|
|
168
171
|
return self._ll_variant.genotypes
|
|
@@ -170,8 +173,8 @@ class Variant:
|
|
|
170
173
|
@property
|
|
171
174
|
def isolated_as_missing(self) -> bool:
|
|
172
175
|
"""
|
|
173
|
-
True if isolated
|
|
174
|
-
|
|
176
|
+
True if isolated nodes are decoded to missing data. If False, isolated
|
|
177
|
+
nodes are decoded to the ancestral state.
|
|
175
178
|
"""
|
|
176
179
|
return self._ll_variant.isolated_as_missing
|
|
177
180
|
|
|
@@ -179,7 +182,7 @@ class Variant:
|
|
|
179
182
|
def has_missing_data(self) -> bool:
|
|
180
183
|
"""
|
|
181
184
|
True if there is missing data for any of the
|
|
182
|
-
|
|
185
|
+
requested nodes at the current site.
|
|
183
186
|
"""
|
|
184
187
|
alleles = self._ll_variant.alleles
|
|
185
188
|
return len(alleles) > 0 and alleles[-1] is None
|
|
@@ -187,7 +190,7 @@ class Variant:
|
|
|
187
190
|
@property
|
|
188
191
|
def num_missing(self) -> int:
|
|
189
192
|
"""
|
|
190
|
-
The number of
|
|
193
|
+
The number of requested nodes with missing data at this site.
|
|
191
194
|
"""
|
|
192
195
|
return np.sum(self.genotypes == tskit.NULL)
|
|
193
196
|
|
|
@@ -199,7 +202,7 @@ class Variant:
|
|
|
199
202
|
array: firstly missing data is not counted as an allele, and secondly,
|
|
200
203
|
the site may contain mutations to alternative allele states (which are
|
|
201
204
|
counted in the number of alleles) without the mutation being inherited
|
|
202
|
-
by any of the
|
|
205
|
+
by any of the requested nodes.
|
|
203
206
|
"""
|
|
204
207
|
return len(self.alleles) - self.has_missing_data
|
|
205
208
|
|
tskit/metadata.py
CHANGED
|
@@ -33,15 +33,16 @@ import json
|
|
|
33
33
|
import pprint
|
|
34
34
|
import struct
|
|
35
35
|
import types
|
|
36
|
+
from collections.abc import Mapping
|
|
36
37
|
from itertools import islice
|
|
37
38
|
from typing import Any
|
|
38
|
-
from typing import Mapping
|
|
39
39
|
|
|
40
40
|
import jsonschema
|
|
41
41
|
import numpy as np
|
|
42
42
|
|
|
43
43
|
import tskit
|
|
44
44
|
import tskit.exceptions as exceptions
|
|
45
|
+
import tskit.util as util
|
|
45
46
|
|
|
46
47
|
__builtins__object__setattr__ = builtins.object.__setattr__
|
|
47
48
|
|
|
@@ -1041,3 +1042,106 @@ class MetadataProvider:
|
|
|
1041
1042
|
raise AssertionError(
|
|
1042
1043
|
f"Metadata differs: self={self.metadata} " f"other={other.metadata}"
|
|
1043
1044
|
)
|
|
1045
|
+
|
|
1046
|
+
|
|
1047
|
+
NOTSET = object() # Sentinel for unset default values
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
class TableMetadataReader:
|
|
1051
|
+
# Mixin for table classes that expose decoded metadata
|
|
1052
|
+
|
|
1053
|
+
@property
|
|
1054
|
+
def metadata_schema(self) -> MetadataSchema:
|
|
1055
|
+
"""
|
|
1056
|
+
The :class:`tskit.MetadataSchema` for this table.
|
|
1057
|
+
"""
|
|
1058
|
+
# This isn't as inefficient as it looks because we're using an LRU cache on
|
|
1059
|
+
# the parse_metadata_schema function. Thus, we're really only incurring the
|
|
1060
|
+
# cost of creating the unicode string from the low-level schema and looking
|
|
1061
|
+
# up the functools cache.
|
|
1062
|
+
return parse_metadata_schema(self.ll_table.metadata_schema)
|
|
1063
|
+
|
|
1064
|
+
def metadata_vector(self, key, *, dtype=None, default_value=NOTSET):
|
|
1065
|
+
"""
|
|
1066
|
+
Returns a numpy array of metadata values obtained by extracting ``key``
|
|
1067
|
+
from each metadata entry, and using ``default_value`` if the key is
|
|
1068
|
+
not present. ``key`` may be a list, in which case nested values are returned.
|
|
1069
|
+
For instance, ``key = ["a", "x"]`` will return an array of
|
|
1070
|
+
``row.metadata["a"]["x"]`` values, iterated over rows in this table.
|
|
1071
|
+
|
|
1072
|
+
:param str key: The name, or a list of names, of metadata entries.
|
|
1073
|
+
:param str dtype: The dtype of the result (can usually be omitted).
|
|
1074
|
+
:param object default_value: The value to be inserted if the metadata key
|
|
1075
|
+
is not present. Note that for numeric columns, a default value of None
|
|
1076
|
+
will result in a non-numeric array. The default behaviour is to raise
|
|
1077
|
+
``KeyError`` on missing entries.
|
|
1078
|
+
"""
|
|
1079
|
+
from collections.abc import Mapping
|
|
1080
|
+
|
|
1081
|
+
if default_value == NOTSET:
|
|
1082
|
+
|
|
1083
|
+
def getter(d, k):
|
|
1084
|
+
return d[k]
|
|
1085
|
+
|
|
1086
|
+
else:
|
|
1087
|
+
|
|
1088
|
+
def getter(d, k):
|
|
1089
|
+
return (
|
|
1090
|
+
d.get(k, default_value) if isinstance(d, Mapping) else default_value
|
|
1091
|
+
)
|
|
1092
|
+
|
|
1093
|
+
if isinstance(key, list):
|
|
1094
|
+
out = np.array(
|
|
1095
|
+
[functools.reduce(getter, key, row.metadata) for row in self],
|
|
1096
|
+
dtype=dtype,
|
|
1097
|
+
)
|
|
1098
|
+
else:
|
|
1099
|
+
out = np.array(
|
|
1100
|
+
[getter(row.metadata, key) for row in self],
|
|
1101
|
+
dtype=dtype,
|
|
1102
|
+
)
|
|
1103
|
+
return out
|
|
1104
|
+
|
|
1105
|
+
def _make_row(self, *args):
|
|
1106
|
+
return self.row_class(*args, metadata_decoder=self.metadata_schema.decode_row)
|
|
1107
|
+
|
|
1108
|
+
|
|
1109
|
+
class TableMetadataWriter(TableMetadataReader):
|
|
1110
|
+
# Mixin for tables writing metadata
|
|
1111
|
+
|
|
1112
|
+
@TableMetadataReader.metadata_schema.setter
|
|
1113
|
+
def metadata_schema(self, schema: MetadataSchema) -> None:
|
|
1114
|
+
if not isinstance(schema, MetadataSchema):
|
|
1115
|
+
raise TypeError(
|
|
1116
|
+
"Only instances of tskit.MetadataSchema can be assigned to "
|
|
1117
|
+
f"metadata_schema, not {type(schema)}"
|
|
1118
|
+
)
|
|
1119
|
+
self.ll_table.metadata_schema = repr(schema)
|
|
1120
|
+
|
|
1121
|
+
def packset_metadata(self, metadatas):
|
|
1122
|
+
"""
|
|
1123
|
+
Packs the specified list of metadata values and updates the ``metadata``
|
|
1124
|
+
and ``metadata_offset`` columns. The length of the metadatas array
|
|
1125
|
+
must be equal to the number of rows in the table.
|
|
1126
|
+
|
|
1127
|
+
:param list metadatas: A list of metadata bytes values.
|
|
1128
|
+
"""
|
|
1129
|
+
packed, offset = util.pack_bytes(metadatas)
|
|
1130
|
+
data = self.asdict()
|
|
1131
|
+
data["metadata"] = packed
|
|
1132
|
+
data["metadata_offset"] = offset
|
|
1133
|
+
self.set_columns(**data)
|
|
1134
|
+
|
|
1135
|
+
def drop_metadata(self, *, keep_schema=False):
|
|
1136
|
+
"""
|
|
1137
|
+
Drops all metadata in this table. By default, the schema is also cleared,
|
|
1138
|
+
except if ``keep_schema`` is True.
|
|
1139
|
+
|
|
1140
|
+
:param bool keep_schema: True if the current schema should be kept intact.
|
|
1141
|
+
"""
|
|
1142
|
+
data = self.asdict()
|
|
1143
|
+
data["metadata"] = []
|
|
1144
|
+
data["metadata_offset"][:] = 0
|
|
1145
|
+
self.set_columns(**data)
|
|
1146
|
+
if not keep_schema:
|
|
1147
|
+
self.metadata_schema = MetadataSchema.null()
|