tskit 1.0.1__cp314-cp314-macosx_10_15_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _tskit.cpython-314-darwin.so +0 -0
- tskit/__init__.py +92 -0
- tskit/__main__.py +4 -0
- tskit/_version.py +4 -0
- tskit/cli.py +273 -0
- tskit/combinatorics.py +1522 -0
- tskit/drawing.py +2809 -0
- tskit/exceptions.py +70 -0
- tskit/genotypes.py +410 -0
- tskit/intervals.py +601 -0
- tskit/jit/__init__.py +0 -0
- tskit/jit/numba.py +674 -0
- tskit/metadata.py +1147 -0
- tskit/provenance.py +150 -0
- tskit/provenance.schema.json +72 -0
- tskit/stats.py +165 -0
- tskit/tables.py +4858 -0
- tskit/text_formats.py +456 -0
- tskit/trees.py +11457 -0
- tskit/util.py +901 -0
- tskit/vcf.py +219 -0
- tskit-1.0.1.dist-info/METADATA +105 -0
- tskit-1.0.1.dist-info/RECORD +27 -0
- tskit-1.0.1.dist-info/WHEEL +5 -0
- tskit-1.0.1.dist-info/entry_points.txt +2 -0
- tskit-1.0.1.dist-info/licenses/LICENSE +21 -0
- tskit-1.0.1.dist-info/top_level.txt +2 -0
|
Binary file
|
tskit/__init__.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# MIT License
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2018-2025 Tskit Developers
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
# furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
# copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
# SOFTWARE.
|
|
22
|
+
import _tskit
|
|
23
|
+
|
|
24
|
+
#: Special reserved value representing a null ID.
|
|
25
|
+
NULL = _tskit.NULL
|
|
26
|
+
|
|
27
|
+
#: Special value representing missing data in a genotype array
|
|
28
|
+
MISSING_DATA = _tskit.MISSING_DATA
|
|
29
|
+
|
|
30
|
+
#: Node flag value indicating that it is a sample.
|
|
31
|
+
NODE_IS_SAMPLE = _tskit.NODE_IS_SAMPLE
|
|
32
|
+
|
|
33
|
+
#: Constant representing the forward direction of travel (i.e.,
|
|
34
|
+
#: increasing genomic coordinate values).
|
|
35
|
+
FORWARD = _tskit.FORWARD
|
|
36
|
+
|
|
37
|
+
#: Constant representing the reverse direction of travel (i.e.,
|
|
38
|
+
#: decreasing genomic coordinate values).
|
|
39
|
+
REVERSE = _tskit.REVERSE
|
|
40
|
+
|
|
41
|
+
#: The allele mapping where the strings "0" and "1" map to genotype
|
|
42
|
+
#: values 0 and 1.
|
|
43
|
+
ALLELES_01 = ("0", "1")
|
|
44
|
+
|
|
45
|
+
#: The allele mapping where the four nucleotides A, C, G and T map to
|
|
46
|
+
#: the genotype integers 0, 1, 2, and 3, respectively.
|
|
47
|
+
ALLELES_ACGT = ("A", "C", "G", "T")
|
|
48
|
+
|
|
49
|
+
#: Special NAN value used to indicate unknown mutation times. Since this is a
|
|
50
|
+
#: NAN value, you cannot use `==` to test for it. Use :func:`is_unknown_time` instead.
|
|
51
|
+
UNKNOWN_TIME = _tskit.UNKNOWN_TIME
|
|
52
|
+
|
|
53
|
+
#: Default value of ts.time_units
|
|
54
|
+
TIME_UNITS_UNKNOWN = _tskit.TIME_UNITS_UNKNOWN
|
|
55
|
+
|
|
56
|
+
#: ts.time_units value when dimension is uncalibrated
|
|
57
|
+
TIME_UNITS_UNCALIBRATED = _tskit.TIME_UNITS_UNCALIBRATED
|
|
58
|
+
|
|
59
|
+
#: Options for printing to strings and HTML, modify with tskit.set_print_options.
|
|
60
|
+
_print_options = {"max_lines": 40}
|
|
61
|
+
|
|
62
|
+
TABLE_NAMES = [
|
|
63
|
+
"individuals",
|
|
64
|
+
"nodes",
|
|
65
|
+
"edges",
|
|
66
|
+
"migrations",
|
|
67
|
+
"sites",
|
|
68
|
+
"mutations",
|
|
69
|
+
"populations",
|
|
70
|
+
"provenances",
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
from tskit.provenance import __version__ # NOQA
|
|
75
|
+
from tskit.provenance import validate_provenance # NOQA
|
|
76
|
+
from tskit.trees import * # NOQA
|
|
77
|
+
from tskit.genotypes import Variant # NOQA
|
|
78
|
+
from tskit.tables import * # NOQA
|
|
79
|
+
from tskit.stats import * # NOQA
|
|
80
|
+
from tskit.combinatorics import ( # NOQA
|
|
81
|
+
all_trees,
|
|
82
|
+
all_tree_shapes,
|
|
83
|
+
all_tree_labellings,
|
|
84
|
+
TopologyCounter,
|
|
85
|
+
Rank,
|
|
86
|
+
)
|
|
87
|
+
from tskit.drawing import SVGString # NOQA
|
|
88
|
+
from tskit.exceptions import * # NOQA
|
|
89
|
+
from tskit.util import * # NOQA
|
|
90
|
+
from tskit.metadata import * # NOQA
|
|
91
|
+
from tskit.text_formats import * # NOQA
|
|
92
|
+
from tskit.intervals import RateMap # NOQA
|
tskit/__main__.py
ADDED
tskit/_version.py
ADDED
tskit/cli.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
#
|
|
2
|
+
# MIT License
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2018-2025 Tskit Developers
|
|
5
|
+
# Copyright (c) 2015-2018 University of Oxford
|
|
6
|
+
#
|
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
# in the Software without restriction, including without limitation the rights
|
|
10
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
# furnished to do so, subject to the following conditions:
|
|
13
|
+
#
|
|
14
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
# copies or substantial portions of the Software.
|
|
16
|
+
#
|
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
# SOFTWARE.
|
|
24
|
+
"""
|
|
25
|
+
Command line utilities for tskit.
|
|
26
|
+
"""
|
|
27
|
+
import argparse
|
|
28
|
+
import json
|
|
29
|
+
import os
|
|
30
|
+
import signal
|
|
31
|
+
import sys
|
|
32
|
+
|
|
33
|
+
import tskit
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def set_sigpipe_handler():
|
|
37
|
+
if os.name == "posix":
|
|
38
|
+
# Set signal handler for SIGPIPE to quietly kill the program.
|
|
39
|
+
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def sys_exit(message):
|
|
43
|
+
sys.exit(message)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def load_tree_sequence(path):
|
|
47
|
+
try:
|
|
48
|
+
return tskit.load(path)
|
|
49
|
+
except OSError as e:
|
|
50
|
+
sys_exit(f"Load error: {e}")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def run_info(args):
|
|
54
|
+
print(load_tree_sequence(args.tree_sequence))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def run_trees(args):
|
|
58
|
+
ts = load_tree_sequence(args.tree_sequence)
|
|
59
|
+
for tree in ts.trees():
|
|
60
|
+
print(f"tree {tree.index}:")
|
|
61
|
+
print(f" num_sites: {tree.num_sites}")
|
|
62
|
+
print(
|
|
63
|
+
" interval: ({0:.{2}f}, {1:.{2}f})".format(
|
|
64
|
+
tree.interval.left, tree.interval.right, args.precision
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
if args.draw:
|
|
68
|
+
print(tree.draw(format="unicode"))
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def run_individuals(args):
|
|
72
|
+
tree_sequence = load_tree_sequence(args.tree_sequence)
|
|
73
|
+
tree_sequence.dump_text(individuals=sys.stdout, precision=args.precision)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def run_nodes(args):
|
|
77
|
+
tree_sequence = load_tree_sequence(args.tree_sequence)
|
|
78
|
+
tree_sequence.dump_text(nodes=sys.stdout, precision=args.precision)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def run_edges(args):
|
|
82
|
+
tree_sequence = load_tree_sequence(args.tree_sequence)
|
|
83
|
+
tree_sequence.dump_text(edges=sys.stdout, precision=args.precision)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def run_sites(args):
|
|
87
|
+
tree_sequence = load_tree_sequence(args.tree_sequence)
|
|
88
|
+
tree_sequence.dump_text(sites=sys.stdout, precision=args.precision)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def run_mutations(args):
|
|
92
|
+
tree_sequence = load_tree_sequence(args.tree_sequence)
|
|
93
|
+
tree_sequence.dump_text(mutations=sys.stdout, precision=args.precision)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def run_populations(args):
|
|
97
|
+
tree_sequence = load_tree_sequence(args.tree_sequence)
|
|
98
|
+
tree_sequence.dump_text(populations=sys.stdout)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def run_migrations(args):
|
|
102
|
+
tree_sequence = load_tree_sequence(args.tree_sequence)
|
|
103
|
+
tree_sequence.dump_text(migrations=sys.stdout, precision=args.precision)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def run_provenances(args):
|
|
107
|
+
tree_sequence = load_tree_sequence(args.tree_sequence)
|
|
108
|
+
if args.human:
|
|
109
|
+
for provenance in tree_sequence.provenances():
|
|
110
|
+
d = json.loads(provenance.record)
|
|
111
|
+
print(
|
|
112
|
+
"id={}, timestamp={}, record={}".format(
|
|
113
|
+
provenance.id, provenance.timestamp, json.dumps(d, indent=4)
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
else:
|
|
117
|
+
tree_sequence.dump_text(provenances=sys.stdout)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def run_fasta(args):
|
|
121
|
+
tree_sequence = load_tree_sequence(args.tree_sequence)
|
|
122
|
+
tree_sequence.write_fasta(sys.stdout, wrap_width=args.wrap)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def run_vcf(args):
|
|
126
|
+
tree_sequence = load_tree_sequence(args.tree_sequence)
|
|
127
|
+
tree_sequence.write_vcf(
|
|
128
|
+
sys.stdout,
|
|
129
|
+
ploidy=args.ploidy,
|
|
130
|
+
contig_id=args.contig_id,
|
|
131
|
+
allow_position_zero=args.allow_position_zero,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def add_tree_sequence_argument(parser):
|
|
136
|
+
parser.add_argument("tree_sequence", help="The tskit tree sequence file")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def add_precision_argument(parser):
|
|
140
|
+
parser.add_argument(
|
|
141
|
+
"--precision",
|
|
142
|
+
"-p",
|
|
143
|
+
type=int,
|
|
144
|
+
default=6,
|
|
145
|
+
help="The number of decimal places to print in records",
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def get_tskit_parser():
|
|
150
|
+
top_parser = argparse.ArgumentParser(
|
|
151
|
+
prog="python3 -m tskit", description="Command line interface for tskit."
|
|
152
|
+
)
|
|
153
|
+
top_parser.add_argument(
|
|
154
|
+
"-V", "--version", action="version", version=f"%(prog)s {tskit.__version__}"
|
|
155
|
+
)
|
|
156
|
+
subparsers = top_parser.add_subparsers(dest="subcommand")
|
|
157
|
+
subparsers.required = True
|
|
158
|
+
|
|
159
|
+
parser = subparsers.add_parser(
|
|
160
|
+
"info", help="Print summary information about a tree sequence."
|
|
161
|
+
)
|
|
162
|
+
add_tree_sequence_argument(parser)
|
|
163
|
+
parser.set_defaults(runner=run_info)
|
|
164
|
+
|
|
165
|
+
parser = subparsers.add_parser("trees", help="Print information about trees.")
|
|
166
|
+
add_tree_sequence_argument(parser)
|
|
167
|
+
add_precision_argument(parser)
|
|
168
|
+
parser.add_argument(
|
|
169
|
+
"--draw", "-d", action="store_true", default=False, help="Draw the trees"
|
|
170
|
+
)
|
|
171
|
+
parser.set_defaults(runner=run_trees)
|
|
172
|
+
|
|
173
|
+
# suppress fasta visibility until we have a reference sequence
|
|
174
|
+
# See https://github.com/tskit-dev/tskit/issues/1888
|
|
175
|
+
# parser = subparsers.add_parser(
|
|
176
|
+
# "fasta",
|
|
177
|
+
# help="Convert the tree sequence haplotypes to fasta format")
|
|
178
|
+
# add_tree_sequence_argument(parser)
|
|
179
|
+
# parser.add_argument(
|
|
180
|
+
# "--wrap", "-w", type=int, default=60,
|
|
181
|
+
# help=("line-wrapping width for printed sequences"))
|
|
182
|
+
# parser.set_defaults(runner=run_fasta)
|
|
183
|
+
parser = subparsers.add_parser(
|
|
184
|
+
"vcf", help="Convert the tree sequence genotypes to VCF format."
|
|
185
|
+
)
|
|
186
|
+
add_tree_sequence_argument(parser)
|
|
187
|
+
parser.add_argument(
|
|
188
|
+
"--ploidy",
|
|
189
|
+
"-P",
|
|
190
|
+
type=int,
|
|
191
|
+
default=None,
|
|
192
|
+
help=(
|
|
193
|
+
"If the tree sequence does not contain information about "
|
|
194
|
+
"individuals, create them by combining adjacent samples nodes "
|
|
195
|
+
"into individuals of the specified ploidy. It is an error "
|
|
196
|
+
"to provide this argument if the tree sequence does contain "
|
|
197
|
+
"individuals"
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
parser.add_argument(
|
|
201
|
+
"--contig-id", "-c", type=str, default="1", help="Specify the contig id"
|
|
202
|
+
)
|
|
203
|
+
parser.add_argument(
|
|
204
|
+
"--allow-position-zero",
|
|
205
|
+
"-0",
|
|
206
|
+
action="store_true",
|
|
207
|
+
default=False,
|
|
208
|
+
help="Allow position 0 sites",
|
|
209
|
+
)
|
|
210
|
+
parser.set_defaults(runner=run_vcf)
|
|
211
|
+
|
|
212
|
+
parser = subparsers.add_parser(
|
|
213
|
+
"individuals", help="Output individuals in tabular format."
|
|
214
|
+
)
|
|
215
|
+
add_tree_sequence_argument(parser)
|
|
216
|
+
add_precision_argument(parser)
|
|
217
|
+
parser.set_defaults(runner=run_individuals)
|
|
218
|
+
|
|
219
|
+
parser = subparsers.add_parser("nodes", help="Output nodes in tabular format.")
|
|
220
|
+
add_tree_sequence_argument(parser)
|
|
221
|
+
add_precision_argument(parser)
|
|
222
|
+
parser.set_defaults(runner=run_nodes)
|
|
223
|
+
|
|
224
|
+
parser = subparsers.add_parser("edges", help="Output edges in tabular format.")
|
|
225
|
+
add_tree_sequence_argument(parser)
|
|
226
|
+
add_precision_argument(parser)
|
|
227
|
+
parser.set_defaults(runner=run_edges)
|
|
228
|
+
|
|
229
|
+
parser = subparsers.add_parser("sites", help="Output sites in tabular format.")
|
|
230
|
+
add_tree_sequence_argument(parser)
|
|
231
|
+
add_precision_argument(parser)
|
|
232
|
+
parser.set_defaults(runner=run_sites)
|
|
233
|
+
|
|
234
|
+
parser = subparsers.add_parser(
|
|
235
|
+
"mutations", help="Output mutations in tabular format."
|
|
236
|
+
)
|
|
237
|
+
add_tree_sequence_argument(parser)
|
|
238
|
+
add_precision_argument(parser)
|
|
239
|
+
parser.set_defaults(runner=run_mutations)
|
|
240
|
+
|
|
241
|
+
parser = subparsers.add_parser(
|
|
242
|
+
"populations", help="Output population information in tabular format."
|
|
243
|
+
)
|
|
244
|
+
add_tree_sequence_argument(parser)
|
|
245
|
+
parser.set_defaults(runner=run_populations)
|
|
246
|
+
|
|
247
|
+
parser = subparsers.add_parser(
|
|
248
|
+
"migrations", help="Output migration information in tabular format."
|
|
249
|
+
)
|
|
250
|
+
add_tree_sequence_argument(parser)
|
|
251
|
+
add_precision_argument(parser)
|
|
252
|
+
parser.set_defaults(runner=run_migrations)
|
|
253
|
+
|
|
254
|
+
parser = subparsers.add_parser(
|
|
255
|
+
"provenances", help="Output provenance information in tabular format."
|
|
256
|
+
)
|
|
257
|
+
add_tree_sequence_argument(parser)
|
|
258
|
+
parser.add_argument(
|
|
259
|
+
"-H",
|
|
260
|
+
"--human",
|
|
261
|
+
action="store_true",
|
|
262
|
+
help="Print out the provenances in a human readable format",
|
|
263
|
+
)
|
|
264
|
+
parser.set_defaults(runner=run_provenances)
|
|
265
|
+
|
|
266
|
+
return top_parser
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def tskit_main(arg_list=None):
|
|
270
|
+
set_sigpipe_handler()
|
|
271
|
+
parser = get_tskit_parser()
|
|
272
|
+
args = parser.parse_args(arg_list)
|
|
273
|
+
args.runner(args)
|