tskit 1.0.0b2__cp312-cp312-win_amd64.whl → 1.0.1__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
tskit/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # Definitive location for the version number.
2
2
  # During development, should be x.y.z.devN
3
3
  # For beta should be x.y.zbN
4
- tskit_version = "1.0.0b2"
4
+ tskit_version = "1.0.1"
tskit/combinatorics.py CHANGED
@@ -541,6 +541,12 @@ class TopologyCounter:
541
541
  k = TopologyCounter._to_key(sample_set_indexes)
542
542
  self.topologies[k] = counter
543
543
 
544
+ def __iter__(self):
545
+ raise TypeError(
546
+ "TopologyCounter object is not iterable, "
547
+ "iterate over '.topologies' instead"
548
+ )
549
+
544
550
  @staticmethod
545
551
  def _to_key(sample_set_indexes):
546
552
  if not isinstance(sample_set_indexes, collections.abc.Iterable):
tskit/drawing.py CHANGED
@@ -31,10 +31,8 @@ import numbers
31
31
  import operator
32
32
  import warnings
33
33
  import xml.dom.minidom
34
+ from collections.abc import Mapping
34
35
  from dataclasses import dataclass
35
- from typing import List
36
- from typing import Mapping
37
- from typing import Union
38
36
 
39
37
  import numpy as np
40
38
 
@@ -538,7 +536,7 @@ def clip_ts(ts, x_min, x_max, max_num_trees=None):
538
536
  return ts, tree_status, offsets
539
537
 
540
538
 
541
- def check_y_ticks(ticks: Union[List, Mapping, None]) -> Mapping:
539
+ def check_y_ticks(ticks: list | Mapping | None) -> Mapping:
542
540
  """
543
541
  Later we might want to implement a tick locator function, such that e.g. ticks=5
544
542
  selects ~5 nicely spaced tick locations (with sensible behaviour for log scales)
tskit/exceptions.py CHANGED
@@ -60,3 +60,11 @@ class MetadataEncodingError(TskitException):
60
60
  """
61
61
  A metadata object was of a type that could not be encoded
62
62
  """
63
+
64
+
65
+ class ImmutableTableError(ValueError):
66
+ """
67
+ Raised when attempting to modify an immutable table view.
68
+
69
+ Use TreeSequence.dump_tables() to get a mutable copy.
70
+ """
tskit/genotypes.py CHANGED
@@ -38,12 +38,13 @@ import tskit.util as util
38
38
  class Variant:
39
39
  """
40
40
  A variant in a tree sequence, describing the observed genetic variation
41
- among samples for a given site. A variant consists of (a) a tuple of
42
- **alleles** listing the potential allelic states which samples at this site
43
- can possess; (b) an array of **genotypes** mapping sample IDs to the observed
44
- alleles (c) a reference to the :class:`Site` at which the Variant has been decoded
45
- and (d) an array of **samples** giving the node id to which the each element of
46
- the genotypes array corresponds.
41
+ among the specified nodes (by default, the sample nodes) for a given site.
42
+ A variant consists of (a) a tuple of **alleles** listing the potential
43
+ allelic states which the requested nodes at this site can possess; (b) an
44
+ array of **genotypes** mapping node IDs to the observed alleles; (c) a
45
+ reference to the :class:`Site` at which the Variant has been decoded; and
46
+ (d) an array of **samples** giving the node ID to which each element of the
47
+ genotypes array corresponds.
47
48
 
48
49
  After creation a Variant is not yet decoded, and has no genotypes.
49
50
  To decode a Variant, call the :meth:`decode` method. The Variant class will then
@@ -72,12 +73,13 @@ class Variant:
72
73
  In this case, there is no indication of which allele is the ancestral state,
73
74
  as the ordering is determined by the user.
74
75
 
75
- The ``genotypes`` represent the observed allelic states for each sample,
76
- such that ``var.alleles[var.genotypes[j]]`` gives the string allele
77
- for sample ID ``j``. Thus, the elements of the genotypes array are
76
+ The ``genotypes`` represent the observed allelic states for each requested
77
+ node, such that ``var.alleles[var.genotypes[j]]`` gives the string allele
78
+ for the node at index ``j`` (i.e., for ``variant.samples[j]``). Thus, the
79
+ elements of the genotypes array are
78
80
  indexes into the ``alleles`` list. The genotypes are provided in this
79
81
  way via a numpy numeric array to enable efficient calculations. To obtain a
80
- (less efficient) array of allele strings for each sample, you can use e.g.
82
+ (less efficient) array of allele strings for each node, you can use e.g.
81
83
  ``np.asarray(variant.alleles)[variant.genotypes]``.
82
84
 
83
85
  When :ref:`missing data<sec_data_model_missing_data>` is present at a given
@@ -95,10 +97,11 @@ class Variant:
95
97
  :param TreeSequence tree_sequence: The tree sequence to which this variant
96
98
  belongs.
97
99
  :param array_like samples: An array of node IDs for which to generate
98
- genotypes, or None for all sample nodes. Default: None.
100
+ genotypes, or ``None`` for all sample nodes. Non-sample nodes may also
101
+ be provided to generate genotypes for internal nodes. Default: ``None``.
99
102
  :param bool isolated_as_missing: If True, the genotype value assigned to
100
- missing samples (i.e., isolated samples without mutations) is
101
- :data:`.MISSING_DATA` (-1). If False, missing samples will be
103
+ isolated nodes without mutations (samples or non-samples) is
104
+ :data:`.MISSING_DATA` (-1). If False, such nodes will be
102
105
  assigned the allele index for the ancestral state.
103
106
  Default: True.
104
107
  :param tuple alleles: A tuple of strings defining the encoding of
@@ -143,7 +146,7 @@ class Variant:
143
146
  @property
144
147
  def alleles(self) -> tuple[str | None, ...]:
145
148
  """
146
- A tuple of the allelic values which samples can possess at the current
149
+ A tuple of the allelic values which nodes can possess at the current
147
150
  site. Unless an encoding of alleles is specified when creating this
148
151
  variant instance, the first element of this tuple is always the site's
149
152
  ancestral state.
@@ -162,7 +165,7 @@ class Variant:
162
165
  def genotypes(self) -> np.ndarray:
163
166
  """
164
167
  An array of indexes into the list ``alleles``, giving the
165
- state of each sample at the current site.
168
+ state of each requested node at the current site.
166
169
  """
167
170
  self._check_decoded()
168
171
  return self._ll_variant.genotypes
@@ -170,8 +173,8 @@ class Variant:
170
173
  @property
171
174
  def isolated_as_missing(self) -> bool:
172
175
  """
173
- True if isolated samples are decoded to missing data. If False, isolated
174
- samples are decoded to the ancestral state.
176
+ True if isolated nodes are decoded to missing data. If False, isolated
177
+ nodes are decoded to the ancestral state.
175
178
  """
176
179
  return self._ll_variant.isolated_as_missing
177
180
 
@@ -179,7 +182,7 @@ class Variant:
179
182
  def has_missing_data(self) -> bool:
180
183
  """
181
184
  True if there is missing data for any of the
182
- samples at the current site.
185
+ requested nodes at the current site.
183
186
  """
184
187
  alleles = self._ll_variant.alleles
185
188
  return len(alleles) > 0 and alleles[-1] is None
@@ -187,7 +190,7 @@ class Variant:
187
190
  @property
188
191
  def num_missing(self) -> int:
189
192
  """
190
- The number of samples with missing data at this site.
193
+ The number of requested nodes with missing data at this site.
191
194
  """
192
195
  return np.sum(self.genotypes == tskit.NULL)
193
196
 
@@ -199,7 +202,7 @@ class Variant:
199
202
  array: firstly missing data is not counted as an allele, and secondly,
200
203
  the site may contain mutations to alternative allele states (which are
201
204
  counted in the number of alleles) without the mutation being inherited
202
- by any of the samples.
205
+ by any of the requested nodes.
203
206
  """
204
207
  return len(self.alleles) - self.has_missing_data
205
208
 
tskit/metadata.py CHANGED
@@ -33,15 +33,16 @@ import json
33
33
  import pprint
34
34
  import struct
35
35
  import types
36
+ from collections.abc import Mapping
36
37
  from itertools import islice
37
38
  from typing import Any
38
- from typing import Mapping
39
39
 
40
40
  import jsonschema
41
41
  import numpy as np
42
42
 
43
43
  import tskit
44
44
  import tskit.exceptions as exceptions
45
+ import tskit.util as util
45
46
 
46
47
  __builtins__object__setattr__ = builtins.object.__setattr__
47
48
 
@@ -1041,3 +1042,106 @@ class MetadataProvider:
1041
1042
  raise AssertionError(
1042
1043
  f"Metadata differs: self={self.metadata} " f"other={other.metadata}"
1043
1044
  )
1045
+
1046
+
1047
+ NOTSET = object() # Sentinel for unset default values
1048
+
1049
+
1050
+ class TableMetadataReader:
1051
+ # Mixin for table classes that expose decoded metadata
1052
+
1053
+ @property
1054
+ def metadata_schema(self) -> MetadataSchema:
1055
+ """
1056
+ The :class:`tskit.MetadataSchema` for this table.
1057
+ """
1058
+ # This isn't as inefficient as it looks because we're using an LRU cache on
1059
+ # the parse_metadata_schema function. Thus, we're really only incurring the
1060
+ # cost of creating the unicode string from the low-level schema and looking
1061
+ # up the functools cache.
1062
+ return parse_metadata_schema(self.ll_table.metadata_schema)
1063
+
1064
+ def metadata_vector(self, key, *, dtype=None, default_value=NOTSET):
1065
+ """
1066
+ Returns a numpy array of metadata values obtained by extracting ``key``
1067
+ from each metadata entry, and using ``default_value`` if the key is
1068
+ not present. ``key`` may be a list, in which case nested values are returned.
1069
+ For instance, ``key = ["a", "x"]`` will return an array of
1070
+ ``row.metadata["a"]["x"]`` values, iterated over rows in this table.
1071
+
1072
+ :param str key: The name, or a list of names, of metadata entries.
1073
+ :param str dtype: The dtype of the result (can usually be omitted).
1074
+ :param object default_value: The value to be inserted if the metadata key
1075
+ is not present. Note that for numeric columns, a default value of None
1076
+ will result in a non-numeric array. The default behaviour is to raise
1077
+ ``KeyError`` on missing entries.
1078
+ """
1079
+ from collections.abc import Mapping
1080
+
1081
+ if default_value == NOTSET:
1082
+
1083
+ def getter(d, k):
1084
+ return d[k]
1085
+
1086
+ else:
1087
+
1088
+ def getter(d, k):
1089
+ return (
1090
+ d.get(k, default_value) if isinstance(d, Mapping) else default_value
1091
+ )
1092
+
1093
+ if isinstance(key, list):
1094
+ out = np.array(
1095
+ [functools.reduce(getter, key, row.metadata) for row in self],
1096
+ dtype=dtype,
1097
+ )
1098
+ else:
1099
+ out = np.array(
1100
+ [getter(row.metadata, key) for row in self],
1101
+ dtype=dtype,
1102
+ )
1103
+ return out
1104
+
1105
+ def _make_row(self, *args):
1106
+ return self.row_class(*args, metadata_decoder=self.metadata_schema.decode_row)
1107
+
1108
+
1109
+ class TableMetadataWriter(TableMetadataReader):
1110
+ # Mixin for tables writing metadata
1111
+
1112
+ @TableMetadataReader.metadata_schema.setter
1113
+ def metadata_schema(self, schema: MetadataSchema) -> None:
1114
+ if not isinstance(schema, MetadataSchema):
1115
+ raise TypeError(
1116
+ "Only instances of tskit.MetadataSchema can be assigned to "
1117
+ f"metadata_schema, not {type(schema)}"
1118
+ )
1119
+ self.ll_table.metadata_schema = repr(schema)
1120
+
1121
+ def packset_metadata(self, metadatas):
1122
+ """
1123
+ Packs the specified list of metadata values and updates the ``metadata``
1124
+ and ``metadata_offset`` columns. The length of the metadatas array
1125
+ must be equal to the number of rows in the table.
1126
+
1127
+ :param list metadatas: A list of metadata bytes values.
1128
+ """
1129
+ packed, offset = util.pack_bytes(metadatas)
1130
+ data = self.asdict()
1131
+ data["metadata"] = packed
1132
+ data["metadata_offset"] = offset
1133
+ self.set_columns(**data)
1134
+
1135
+ def drop_metadata(self, *, keep_schema=False):
1136
+ """
1137
+ Drops all metadata in this table. By default, the schema is also cleared,
1138
+ except if ``keep_schema`` is True.
1139
+
1140
+ :param bool keep_schema: True if the current schema should be kept intact.
1141
+ """
1142
+ data = self.asdict()
1143
+ data["metadata"] = []
1144
+ data["metadata_offset"][:] = 0
1145
+ self.set_columns(**data)
1146
+ if not keep_schema:
1147
+ self.metadata_schema = MetadataSchema.null()