tskit 1.0.0b3__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tskit-1.0.0b3/tskit.egg-info → tskit-1.0.1}/PKG-INFO +8 -8
- {tskit-1.0.0b3 → tskit-1.0.1}/_tskitmodule.c +179 -3
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/core.c +75 -52
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/core.h +50 -24
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/genotypes.c +231 -8
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/tables.c +47 -4
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/tables.h +15 -1
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/trees.c +333 -245
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/trees.h +54 -5
- {tskit-1.0.0b3 → tskit-1.0.1}/pyproject.toml +7 -7
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_balance_metrics.py +1 -1
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_cli.py +1 -3
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_divmat.py +48 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_genotypes.py +615 -67
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_highlevel.py +74 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_immutable_table_collection.py +26 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_ld_matrix.py +28 -32
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_lowlevel.py +183 -1
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_metadata.py +29 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_phylo_formats.py +2 -6
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_relatedness_vector.py +88 -22
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_tables.py +248 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_topology.py +87 -14
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/_version.py +1 -1
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/drawing.py +2 -4
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/genotypes.py +23 -20
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/metadata.py +1 -1
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/tables.py +51 -26
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/text_formats.py +4 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/trees.py +413 -245
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/util.py +6 -7
- {tskit-1.0.0b3 → tskit-1.0.1/tskit.egg-info}/PKG-INFO +8 -8
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit.egg-info/requires.txt +5 -5
- {tskit-1.0.0b3 → tskit-1.0.1}/LICENSE +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/MANIFEST.in +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/README.rst +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/subprojects/kastore/kastore.c +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/subprojects/kastore/kastore.h +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/convert.c +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/convert.h +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/genotypes.h +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/haplotype_matching.c +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/haplotype_matching.h +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/stats.c +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/stats.h +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit.h +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/lwt_interface/tskit_lwt_interface.h +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/setup.cfg +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/setup.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_avl_tree.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_coalrate.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_combinatorics.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_dict_encoding.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_distance_metrics.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_drawing.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_extend_haplotypes.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_file_format.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_fileobj.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_genotype_matching.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_haplotype_matching.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_ibd.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_intervals.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_jit.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_ms.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_parsimony.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_provenance.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_reference_sequence.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_stats.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_table_transforms.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_text_formats.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_threads.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_tree_positioning.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_tree_stats.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_util.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_utilities.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_vcf.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_version.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_wright_fisher.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/__init__.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/__main__.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/cli.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/combinatorics.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/exceptions.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/intervals.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/jit/__init__.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/jit/numba.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/provenance.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/provenance.schema.json +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/stats.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit/vcf.py +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit.egg-info/SOURCES.txt +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit.egg-info/dependency_links.txt +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit.egg-info/entry_points.txt +0 -0
- {tskit-1.0.0b3 → tskit-1.0.1}/tskit.egg-info/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tskit
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: The tree sequence toolkit.
|
|
5
5
|
Author-email: Tskit Developers <admin@tskit.dev>
|
|
6
|
-
License: MIT
|
|
6
|
+
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://tskit.dev/tskit
|
|
8
8
|
Project-URL: Documentation, https://tskit.dev/tskit/docs/stable
|
|
9
9
|
Project-URL: Changelog, https://tskit.dev/tskit/docs/stable/changelogs.html
|
|
@@ -17,11 +17,11 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
21
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
22
|
Classifier: Development Status :: 5 - Production/Stable
|
|
22
23
|
Classifier: Environment :: Other Environment
|
|
23
24
|
Classifier: Intended Audience :: Science/Research
|
|
24
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
25
25
|
Classifier: Operating System :: POSIX
|
|
26
26
|
Classifier: Operating System :: MacOS :: MacOS X
|
|
27
27
|
Classifier: Operating System :: Microsoft :: Windows
|
|
@@ -39,9 +39,9 @@ Requires-Dist: dendropy==5.0.1; extra == "test"
|
|
|
39
39
|
Requires-Dist: kastore==0.3.3; extra == "test"
|
|
40
40
|
Requires-Dist: lshmm==0.0.8; extra == "test"
|
|
41
41
|
Requires-Dist: msgpack==1.1.0; extra == "test"
|
|
42
|
-
Requires-Dist: msprime==1.
|
|
42
|
+
Requires-Dist: msprime==1.4.0b2; extra == "test"
|
|
43
43
|
Requires-Dist: networkx==3.2.1; extra == "test"
|
|
44
|
-
Requires-Dist: numba==0.
|
|
44
|
+
Requires-Dist: numba==0.63.1; extra == "test"
|
|
45
45
|
Requires-Dist: portion==2.6.0; extra == "test"
|
|
46
46
|
Requires-Dist: pytest==8.3.5; extra == "test"
|
|
47
47
|
Requires-Dist: pytest-cov==6.0.0; extra == "test"
|
|
@@ -57,8 +57,8 @@ Requires-Dist: breathe==4.35.0; extra == "docs"
|
|
|
57
57
|
Requires-Dist: sphinx-autodoc-typehints==2.3.0; extra == "docs"
|
|
58
58
|
Requires-Dist: sphinx-issues==5.0.0; extra == "docs"
|
|
59
59
|
Requires-Dist: sphinx-argparse==0.5.2; extra == "docs"
|
|
60
|
-
Requires-Dist: msprime==1.
|
|
61
|
-
Requires-Dist: numba==0.
|
|
60
|
+
Requires-Dist: msprime==1.4.0b2; extra == "docs"
|
|
61
|
+
Requires-Dist: numba==0.63.1; extra == "docs"
|
|
62
62
|
Requires-Dist: sphinx-book-theme; extra == "docs"
|
|
63
63
|
Requires-Dist: pandas==2.2.3; extra == "docs"
|
|
64
64
|
Provides-Extra: dev
|
|
@@ -84,7 +84,7 @@ Requires-Dist: tszip; extra == "dev"
|
|
|
84
84
|
Requires-Dist: xmlunittest; extra == "dev"
|
|
85
85
|
Requires-Dist: newick; extra == "dev"
|
|
86
86
|
Requires-Dist: zarr<3; extra == "dev"
|
|
87
|
-
Requires-Dist: jupyter-book; extra == "dev"
|
|
87
|
+
Requires-Dist: jupyter-book<2; extra == "dev"
|
|
88
88
|
Requires-Dist: breathe; extra == "dev"
|
|
89
89
|
Requires-Dist: sphinx-autodoc-typehints; extra == "dev"
|
|
90
90
|
Requires-Dist: sphinx-issues; extra == "dev"
|
|
@@ -4347,15 +4347,18 @@ TableCollection_union(TableCollection *self, PyObject *args, PyObject *kwds)
|
|
|
4347
4347
|
npy_intp *shape;
|
|
4348
4348
|
tsk_flags_t options = 0;
|
|
4349
4349
|
int check_shared = true;
|
|
4350
|
+
int all_edges = false;
|
|
4351
|
+
int all_mutations = false;
|
|
4350
4352
|
int add_populations = true;
|
|
4351
4353
|
static char *kwlist[] = { "other", "other_node_mapping", "check_shared_equality",
|
|
4352
|
-
"add_populations", NULL };
|
|
4354
|
+
"add_populations", "all_edges", "all_mutations", NULL };
|
|
4353
4355
|
|
|
4354
4356
|
if (TableCollection_check_state(self) != 0) {
|
|
4355
4357
|
goto out;
|
|
4356
4358
|
}
|
|
4357
|
-
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|
|
|
4358
|
-
&other, &other_node_mapping, &check_shared,
|
|
4359
|
+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|iiii", kwlist,
|
|
4360
|
+
&TableCollectionType, &other, &other_node_mapping, &check_shared,
|
|
4361
|
+
&add_populations, &all_edges, &all_mutations)) {
|
|
4359
4362
|
goto out;
|
|
4360
4363
|
}
|
|
4361
4364
|
nmap_array = (PyArrayObject *) PyArray_FROMANY(
|
|
@@ -4370,6 +4373,12 @@ TableCollection_union(TableCollection *self, PyObject *args, PyObject *kwds)
|
|
|
4370
4373
|
" number of nodes in the other tree sequence.");
|
|
4371
4374
|
goto out;
|
|
4372
4375
|
}
|
|
4376
|
+
if (all_edges) {
|
|
4377
|
+
options |= TSK_UNION_ALL_EDGES;
|
|
4378
|
+
}
|
|
4379
|
+
if (all_mutations) {
|
|
4380
|
+
options |= TSK_UNION_ALL_MUTATIONS;
|
|
4381
|
+
}
|
|
4373
4382
|
if (!check_shared) {
|
|
4374
4383
|
options |= TSK_UNION_NO_CHECK_SHARED;
|
|
4375
4384
|
}
|
|
@@ -5335,6 +5344,69 @@ out:
|
|
|
5335
5344
|
return ret;
|
|
5336
5345
|
}
|
|
5337
5346
|
|
|
5347
|
+
static PyObject *
|
|
5348
|
+
TreeSequence_link_ancestors(TreeSequence *self, PyObject *args, PyObject *kwds)
|
|
5349
|
+
{
|
|
5350
|
+
int err;
|
|
5351
|
+
PyObject *ret = NULL;
|
|
5352
|
+
PyObject *samples = NULL;
|
|
5353
|
+
PyObject *ancestors = NULL;
|
|
5354
|
+
PyArrayObject *samples_array = NULL;
|
|
5355
|
+
PyArrayObject *ancestors_array = NULL;
|
|
5356
|
+
npy_intp *shape;
|
|
5357
|
+
tsk_size_t num_samples, num_ancestors;
|
|
5358
|
+
EdgeTable *result = NULL;
|
|
5359
|
+
PyObject *result_args = NULL;
|
|
5360
|
+
static char *kwlist[] = { "samples", "ancestors", NULL };
|
|
5361
|
+
|
|
5362
|
+
if (TreeSequence_check_state(self) != 0) {
|
|
5363
|
+
goto out;
|
|
5364
|
+
}
|
|
5365
|
+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO", kwlist, &samples, &ancestors)) {
|
|
5366
|
+
goto out;
|
|
5367
|
+
}
|
|
5368
|
+
|
|
5369
|
+
samples_array = (PyArrayObject *) PyArray_FROMANY(
|
|
5370
|
+
samples, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);
|
|
5371
|
+
if (samples_array == NULL) {
|
|
5372
|
+
goto out;
|
|
5373
|
+
}
|
|
5374
|
+
shape = PyArray_DIMS(samples_array);
|
|
5375
|
+
num_samples = (tsk_size_t) shape[0];
|
|
5376
|
+
|
|
5377
|
+
ancestors_array = (PyArrayObject *) PyArray_FROMANY(
|
|
5378
|
+
ancestors, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);
|
|
5379
|
+
if (ancestors_array == NULL) {
|
|
5380
|
+
goto out;
|
|
5381
|
+
}
|
|
5382
|
+
shape = PyArray_DIMS(ancestors_array);
|
|
5383
|
+
num_ancestors = (tsk_size_t) shape[0];
|
|
5384
|
+
|
|
5385
|
+
result_args = PyTuple_New(0);
|
|
5386
|
+
if (result_args == NULL) {
|
|
5387
|
+
goto out;
|
|
5388
|
+
}
|
|
5389
|
+
result = (EdgeTable *) PyObject_CallObject((PyObject *) &EdgeTableType, result_args);
|
|
5390
|
+
if (result == NULL) {
|
|
5391
|
+
goto out;
|
|
5392
|
+
}
|
|
5393
|
+
err = tsk_table_collection_link_ancestors(self->tree_sequence->tables,
|
|
5394
|
+
PyArray_DATA(samples_array), num_samples, PyArray_DATA(ancestors_array),
|
|
5395
|
+
num_ancestors, 0, result->table);
|
|
5396
|
+
if (err != 0) {
|
|
5397
|
+
handle_library_error(err);
|
|
5398
|
+
goto out;
|
|
5399
|
+
}
|
|
5400
|
+
ret = (PyObject *) result;
|
|
5401
|
+
result = NULL;
|
|
5402
|
+
out:
|
|
5403
|
+
Py_XDECREF(samples_array);
|
|
5404
|
+
Py_XDECREF(ancestors_array);
|
|
5405
|
+
Py_XDECREF(result);
|
|
5406
|
+
Py_XDECREF(result_args);
|
|
5407
|
+
return ret;
|
|
5408
|
+
}
|
|
5409
|
+
|
|
5338
5410
|
static PyObject *
|
|
5339
5411
|
TreeSequence_load(TreeSequence *self, PyObject *args, PyObject *kwds)
|
|
5340
5412
|
{
|
|
@@ -6070,6 +6142,102 @@ out:
|
|
|
6070
6142
|
return ret;
|
|
6071
6143
|
}
|
|
6072
6144
|
|
|
6145
|
+
static PyObject *
|
|
6146
|
+
TreeSequence_decode_alignments(TreeSequence *self, PyObject *args, PyObject *kwds)
|
|
6147
|
+
{
|
|
6148
|
+
int err;
|
|
6149
|
+
PyObject *ret = NULL;
|
|
6150
|
+
PyObject *py_ref, *py_nodes, *py_missing;
|
|
6151
|
+
PyArrayObject *nodes_array = NULL;
|
|
6152
|
+
const char *ref_seq;
|
|
6153
|
+
Py_ssize_t ref_len, missing_len;
|
|
6154
|
+
tsk_id_t *nodes;
|
|
6155
|
+
tsk_size_t num_nodes;
|
|
6156
|
+
double left, right;
|
|
6157
|
+
char missing_char;
|
|
6158
|
+
const char *missing_utf8;
|
|
6159
|
+
int isolated_as_missing = 1;
|
|
6160
|
+
tsk_flags_t options = 0;
|
|
6161
|
+
PyObject *buf_obj = NULL;
|
|
6162
|
+
char *buf = NULL;
|
|
6163
|
+
|
|
6164
|
+
static char *kwlist[] = { "reference_sequence", "nodes", "left", "right",
|
|
6165
|
+
"missing_data_character", "isolated_as_missing", NULL };
|
|
6166
|
+
|
|
6167
|
+
if (TreeSequence_check_state(self) != 0) {
|
|
6168
|
+
goto out;
|
|
6169
|
+
}
|
|
6170
|
+
|
|
6171
|
+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOddOp", kwlist, &py_ref, &py_nodes,
|
|
6172
|
+
&left, &right, &py_missing, &isolated_as_missing)) {
|
|
6173
|
+
goto out;
|
|
6174
|
+
}
|
|
6175
|
+
|
|
6176
|
+
if (!PyBytes_Check(py_ref)) {
|
|
6177
|
+
PyErr_SetString(PyExc_TypeError, "reference_sequence must be bytes");
|
|
6178
|
+
goto out;
|
|
6179
|
+
}
|
|
6180
|
+
if (PyBytes_AsStringAndSize(py_ref, (char **) &ref_seq, &ref_len) < 0) {
|
|
6181
|
+
goto out;
|
|
6182
|
+
}
|
|
6183
|
+
|
|
6184
|
+
if (!PyUnicode_Check(py_missing)) {
|
|
6185
|
+
PyErr_SetString(
|
|
6186
|
+
PyExc_TypeError, "missing_data_character must be a (length 1) string");
|
|
6187
|
+
goto out;
|
|
6188
|
+
}
|
|
6189
|
+
missing_utf8 = PyUnicode_AsUTF8AndSize(py_missing, &missing_len);
|
|
6190
|
+
if (missing_utf8 == NULL) {
|
|
6191
|
+
goto out;
|
|
6192
|
+
}
|
|
6193
|
+
if (missing_len != 1) {
|
|
6194
|
+
PyErr_SetString(
|
|
6195
|
+
PyExc_TypeError, "missing_data_character must be a single character");
|
|
6196
|
+
goto out;
|
|
6197
|
+
}
|
|
6198
|
+
missing_char = missing_utf8[0];
|
|
6199
|
+
|
|
6200
|
+
if (!isolated_as_missing) {
|
|
6201
|
+
options |= TSK_ISOLATED_NOT_MISSING;
|
|
6202
|
+
}
|
|
6203
|
+
|
|
6204
|
+
nodes_array = (PyArrayObject *) PyArray_FROMANY(
|
|
6205
|
+
py_nodes, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);
|
|
6206
|
+
if (nodes_array == NULL) {
|
|
6207
|
+
goto out;
|
|
6208
|
+
}
|
|
6209
|
+
num_nodes = (tsk_size_t) PyArray_DIM(nodes_array, 0);
|
|
6210
|
+
nodes = PyArray_DATA(nodes_array);
|
|
6211
|
+
|
|
6212
|
+
buf_obj = PyBytes_FromStringAndSize(
|
|
6213
|
+
NULL, (Py_ssize_t)(num_nodes * (tsk_size_t)(right - left)));
|
|
6214
|
+
if (buf_obj == NULL) {
|
|
6215
|
+
goto out;
|
|
6216
|
+
}
|
|
6217
|
+
buf = PyBytes_AS_STRING(buf_obj);
|
|
6218
|
+
|
|
6219
|
+
// clang-format off
|
|
6220
|
+
Py_BEGIN_ALLOW_THREADS
|
|
6221
|
+
err = tsk_treeseq_decode_alignments(self->tree_sequence,
|
|
6222
|
+
ref_seq, (tsk_size_t) ref_len, nodes, num_nodes, left, right, missing_char, buf,
|
|
6223
|
+
options);
|
|
6224
|
+
Py_END_ALLOW_THREADS
|
|
6225
|
+
// clang-format on
|
|
6226
|
+
if (err != 0)
|
|
6227
|
+
{
|
|
6228
|
+
handle_library_error(err);
|
|
6229
|
+
goto out;
|
|
6230
|
+
}
|
|
6231
|
+
|
|
6232
|
+
ret = buf_obj;
|
|
6233
|
+
buf_obj = NULL;
|
|
6234
|
+
|
|
6235
|
+
out:
|
|
6236
|
+
Py_XDECREF(nodes_array);
|
|
6237
|
+
Py_XDECREF(buf_obj);
|
|
6238
|
+
return ret;
|
|
6239
|
+
}
|
|
6240
|
+
|
|
6073
6241
|
static PyObject *
|
|
6074
6242
|
TreeSequence_get_mutations_edge(TreeSequence *self)
|
|
6075
6243
|
{
|
|
@@ -8519,6 +8687,10 @@ static PyMethodDef TreeSequence_methods[] = {
|
|
|
8519
8687
|
.ml_meth = (PyCFunction) TreeSequence_dump_tables,
|
|
8520
8688
|
.ml_flags = METH_VARARGS | METH_KEYWORDS,
|
|
8521
8689
|
.ml_doc = "Dumps the tree sequence to the specified set of tables" },
|
|
8690
|
+
{ .ml_name = "link_ancestors",
|
|
8691
|
+
.ml_meth = (PyCFunction) TreeSequence_link_ancestors,
|
|
8692
|
+
.ml_flags = METH_VARARGS | METH_KEYWORDS,
|
|
8693
|
+
.ml_doc = "Returns an EdgeTable linking the specified samples and ancestors." },
|
|
8522
8694
|
{ .ml_name = "get_node",
|
|
8523
8695
|
.ml_meth = (PyCFunction) TreeSequence_get_node,
|
|
8524
8696
|
.ml_flags = METH_VARARGS,
|
|
@@ -8651,6 +8823,10 @@ static PyMethodDef TreeSequence_methods[] = {
|
|
|
8651
8823
|
.ml_meth = (PyCFunction) TreeSequence_get_individuals_nodes,
|
|
8652
8824
|
.ml_flags = METH_NOARGS,
|
|
8653
8825
|
.ml_doc = "Returns an array of the node ids for each individual" },
|
|
8826
|
+
{ .ml_name = "decode_alignments",
|
|
8827
|
+
.ml_meth = (PyCFunction) TreeSequence_decode_alignments,
|
|
8828
|
+
.ml_flags = METH_VARARGS | METH_KEYWORDS,
|
|
8829
|
+
.ml_doc = "Decode full alignments for given nodes and interval." },
|
|
8654
8830
|
{ .ml_name = "get_mutations_edge",
|
|
8655
8831
|
.ml_meth = (PyCFunction) TreeSequence_get_mutations_edge,
|
|
8656
8832
|
.ml_flags = METH_NOARGS,
|
|
@@ -584,6 +584,14 @@ tsk_strerror_internal(int err)
|
|
|
584
584
|
ret = "Must have at least one allele when specifying an allele map. "
|
|
585
585
|
"(TSK_ERR_ZERO_ALLELES)";
|
|
586
586
|
break;
|
|
587
|
+
case TSK_ERR_BAD_ALLELE_LENGTH:
|
|
588
|
+
ret = "Alleles used when decoding alignments must have length one. "
|
|
589
|
+
"(TSK_ERR_BAD_ALLELE_LENGTH)";
|
|
590
|
+
break;
|
|
591
|
+
case TSK_ERR_MISSING_CHAR_COLLISION:
|
|
592
|
+
ret = "Alleles used when decoding alignments must not match the missing "
|
|
593
|
+
"data character. (TSK_ERR_MISSING_CHAR_COLLISION)";
|
|
594
|
+
break;
|
|
587
595
|
|
|
588
596
|
/* Distance metric errors */
|
|
589
597
|
case TSK_ERR_SAMPLE_SIZE_MISMATCH:
|
|
@@ -1033,7 +1041,7 @@ FILE *
|
|
|
1033
1041
|
tsk_get_debug_stream(void)
|
|
1034
1042
|
{
|
|
1035
1043
|
if (_tsk_debug_stream == NULL) {
|
|
1036
|
-
_tsk_debug_stream =
|
|
1044
|
+
_tsk_debug_stream = TSK_DEFAULT_DEBUG_STREAM;
|
|
1037
1045
|
}
|
|
1038
1046
|
return _tsk_debug_stream;
|
|
1039
1047
|
}
|
|
@@ -1260,16 +1268,16 @@ tsk_avl_tree_int_ordered_nodes(const tsk_avl_tree_int_t *self, tsk_avl_node_int_
|
|
|
1260
1268
|
}
|
|
1261
1269
|
|
|
1262
1270
|
// Bit Array implementation. Allows us to store unsigned integers in a compact manner.
|
|
1263
|
-
// Currently implemented as an array of 32-bit unsigned integers
|
|
1271
|
+
// Currently implemented as an array of 32-bit unsigned integers.
|
|
1264
1272
|
|
|
1265
1273
|
int
|
|
1266
|
-
|
|
1274
|
+
tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length)
|
|
1267
1275
|
{
|
|
1268
1276
|
int ret = 0;
|
|
1269
1277
|
|
|
1270
|
-
self->
|
|
1271
|
-
|
|
1272
|
-
self->data = tsk_calloc(self->
|
|
1278
|
+
self->row_len = (num_bits / TSK_BITSET_BITS) + (num_bits % TSK_BITSET_BITS ? 1 : 0);
|
|
1279
|
+
self->len = length;
|
|
1280
|
+
self->data = tsk_calloc(self->row_len * length, sizeof(*self->data));
|
|
1273
1281
|
if (self->data == NULL) {
|
|
1274
1282
|
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
|
|
1275
1283
|
goto out;
|
|
@@ -1278,96 +1286,111 @@ out:
|
|
|
1278
1286
|
return ret;
|
|
1279
1287
|
}
|
|
1280
1288
|
|
|
1281
|
-
|
|
1282
|
-
tsk_bit_array_get_row(const tsk_bit_array_t *self, tsk_size_t row, tsk_bit_array_t *out)
|
|
1283
|
-
{
|
|
1284
|
-
out->size = self->size;
|
|
1285
|
-
out->data = self->data + (row * self->size);
|
|
1286
|
-
}
|
|
1289
|
+
#define BITSET_DATA_ROW(bs, row) ((bs)->data + (row) * (bs)->row_len)
|
|
1287
1290
|
|
|
1288
1291
|
void
|
|
1289
|
-
|
|
1290
|
-
const
|
|
1292
|
+
tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
|
|
1293
|
+
const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out)
|
|
1291
1294
|
{
|
|
1292
|
-
|
|
1293
|
-
|
|
1295
|
+
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
|
|
1296
|
+
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
|
|
1297
|
+
tsk_bitset_val_t *restrict out_d = out->data;
|
|
1298
|
+
for (tsk_size_t i = 0; i < self->row_len; i++) {
|
|
1299
|
+
out_d[i] = self_d[i] & other_d[i];
|
|
1294
1300
|
}
|
|
1295
1301
|
}
|
|
1296
1302
|
|
|
1297
1303
|
void
|
|
1298
|
-
|
|
1304
|
+
tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
|
|
1305
|
+
tsk_size_t other_row)
|
|
1299
1306
|
{
|
|
1300
|
-
|
|
1301
|
-
|
|
1307
|
+
tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
|
|
1308
|
+
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
|
|
1309
|
+
for (tsk_size_t i = 0; i < self->row_len; i++) {
|
|
1310
|
+
self_d[i] &= ~(other_d[i]);
|
|
1302
1311
|
}
|
|
1303
1312
|
}
|
|
1304
1313
|
|
|
1305
1314
|
void
|
|
1306
|
-
|
|
1315
|
+
tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
|
|
1316
|
+
tsk_size_t other_row)
|
|
1307
1317
|
{
|
|
1308
|
-
|
|
1309
|
-
|
|
1318
|
+
tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
|
|
1319
|
+
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
|
|
1320
|
+
for (tsk_size_t i = 0; i < self->row_len; i++) {
|
|
1321
|
+
self_d[i] |= other_d[i];
|
|
1310
1322
|
}
|
|
1311
1323
|
}
|
|
1312
1324
|
|
|
1313
1325
|
void
|
|
1314
|
-
|
|
1326
|
+
tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
|
|
1315
1327
|
{
|
|
1316
|
-
|
|
1317
|
-
self
|
|
1328
|
+
tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
|
|
1329
|
+
*(BITSET_DATA_ROW(self, row) + i) |= (tsk_bitset_val_t) 1
|
|
1330
|
+
<< (bit - (TSK_BITSET_BITS * i));
|
|
1318
1331
|
}
|
|
1319
1332
|
|
|
1320
1333
|
bool
|
|
1321
|
-
|
|
1334
|
+
tsk_bitset_contains(const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
|
|
1322
1335
|
{
|
|
1323
|
-
|
|
1324
|
-
return self
|
|
1325
|
-
& ((
|
|
1336
|
+
tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
|
|
1337
|
+
return *(BITSET_DATA_ROW(self, row) + i)
|
|
1338
|
+
& ((tsk_bitset_val_t) 1 << (bit - (TSK_BITSET_BITS * i)));
|
|
1326
1339
|
}
|
|
1327
1340
|
|
|
1328
|
-
|
|
1329
|
-
|
|
1341
|
+
static inline uint32_t
|
|
1342
|
+
popcount(tsk_bitset_val_t v)
|
|
1330
1343
|
{
|
|
1331
|
-
// Utilizes 12 operations per
|
|
1344
|
+
// Utilizes 12 operations per chunk. NB this only works on 32 bit integers.
|
|
1332
1345
|
// Taken from:
|
|
1333
1346
|
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
|
1334
1347
|
// There's a nice breakdown of this algorithm here:
|
|
1335
1348
|
// https://stackoverflow.com/a/109025
|
|
1336
|
-
// Could probably do better with explicit SIMD (instead of SWAR), but not as
|
|
1337
|
-
// portable: https://arxiv.org/pdf/1611.07612.pdf
|
|
1338
1349
|
//
|
|
1339
|
-
//
|
|
1340
|
-
//
|
|
1341
|
-
//
|
|
1350
|
+
// The gcc/clang compiler flag will -mpopcnt will convert this code to a
|
|
1351
|
+
// popcnt instruction (most if not all modern CPUs will support this). The
|
|
1352
|
+
// popcnt instruction will yield some speed improvements, which depend on
|
|
1353
|
+
// the tree sequence.
|
|
1354
|
+
//
|
|
1355
|
+
// NB: 32bit counting is typically faster than 64bit counting for this task.
|
|
1356
|
+
// (at least on x86-64)
|
|
1357
|
+
|
|
1358
|
+
v = v - ((v >> 1) & 0x55555555);
|
|
1359
|
+
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
|
|
1360
|
+
return (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
|
|
1361
|
+
}
|
|
1342
1362
|
|
|
1343
|
-
|
|
1344
|
-
|
|
1363
|
+
tsk_size_t
|
|
1364
|
+
tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row)
|
|
1365
|
+
{
|
|
1366
|
+
tsk_size_t i = 0;
|
|
1367
|
+
tsk_size_t count = 0;
|
|
1368
|
+
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);
|
|
1345
1369
|
|
|
1346
|
-
for (i = 0; i < self->
|
|
1347
|
-
|
|
1348
|
-
tmp = (tmp & 0x33333333) + ((tmp >> 2) & 0x33333333);
|
|
1349
|
-
count += (((tmp + (tmp >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
|
|
1370
|
+
for (i = 0; i < self->row_len; i++) {
|
|
1371
|
+
count += popcount(self_d[i]);
|
|
1350
1372
|
}
|
|
1351
1373
|
return count;
|
|
1352
1374
|
}
|
|
1353
1375
|
|
|
1354
1376
|
void
|
|
1355
|
-
|
|
1356
|
-
const
|
|
1377
|
+
tsk_bitset_get_items(
|
|
1378
|
+
const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items)
|
|
1357
1379
|
{
|
|
1358
1380
|
// Get the items stored in the row of a bitset.
|
|
1359
|
-
// Uses a de Bruijn sequence lookup table to determine the lowest bit set.
|
|
1360
|
-
// wikipedia article for more info: https://w.wiki/BYiF
|
|
1381
|
+
// Uses a de Bruijn sequence lookup table to determine the lowest bit set.
|
|
1382
|
+
// See the wikipedia article for more info: https://w.wiki/BYiF
|
|
1361
1383
|
|
|
1362
1384
|
tsk_size_t i, n, off;
|
|
1363
|
-
|
|
1385
|
+
tsk_bitset_val_t v, lsb; // least significant bit
|
|
1364
1386
|
static const tsk_id_t lookup[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25,
|
|
1365
1387
|
17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
|
|
1388
|
+
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);
|
|
1366
1389
|
|
|
1367
1390
|
n = 0;
|
|
1368
|
-
for (i = 0; i < self->
|
|
1369
|
-
v =
|
|
1370
|
-
off = i *
|
|
1391
|
+
for (i = 0; i < self->row_len; i++) {
|
|
1392
|
+
v = self_d[i];
|
|
1393
|
+
off = i * TSK_BITSET_BITS;
|
|
1371
1394
|
if (v == 0) {
|
|
1372
1395
|
continue;
|
|
1373
1396
|
}
|
|
@@ -1381,7 +1404,7 @@ tsk_bit_array_get_items(
|
|
|
1381
1404
|
}
|
|
1382
1405
|
|
|
1383
1406
|
void
|
|
1384
|
-
|
|
1407
|
+
tsk_bitset_free(tsk_bitset_t *self)
|
|
1385
1408
|
{
|
|
1386
1409
|
tsk_safe_free(self->data);
|
|
1387
1410
|
}
|
|
@@ -147,7 +147,7 @@ sizes and types of externally visible structs.
|
|
|
147
147
|
The library minor version. Incremented when non-breaking backward-compatible changes
|
|
148
148
|
to the API or ABI are introduced, i.e., the addition of a new function.
|
|
149
149
|
*/
|
|
150
|
-
#define TSK_VERSION_MINOR
|
|
150
|
+
#define TSK_VERSION_MINOR 3
|
|
151
151
|
/**
|
|
152
152
|
The library patch version. Incremented when any changes not relevant to the
|
|
153
153
|
to the API or ABI are introduced, i.e., internal refactors of bugfixes.
|
|
@@ -511,7 +511,7 @@ disallowed (use compute_mutation_times?).
|
|
|
511
511
|
*/
|
|
512
512
|
#define TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME -510
|
|
513
513
|
|
|
514
|
-
/**
|
|
514
|
+
/**
|
|
515
515
|
A mutation's parent was not consistent with the topology of the tree.
|
|
516
516
|
*/
|
|
517
517
|
#define TSK_ERR_BAD_MUTATION_PARENT -511
|
|
@@ -803,6 +803,14 @@ More than 2147483647 alleles were specified.
|
|
|
803
803
|
A user-specified allele map was used, but it contained zero alleles.
|
|
804
804
|
*/
|
|
805
805
|
#define TSK_ERR_ZERO_ALLELES -1103
|
|
806
|
+
/**
|
|
807
|
+
An allele used when decoding alignments had length other than one.
|
|
808
|
+
*/
|
|
809
|
+
#define TSK_ERR_BAD_ALLELE_LENGTH -1104
|
|
810
|
+
/**
|
|
811
|
+
An allele used when decoding alignments matched the missing data character.
|
|
812
|
+
*/
|
|
813
|
+
#define TSK_ERR_MISSING_CHAR_COLLISION -1105
|
|
806
814
|
/** @} */
|
|
807
815
|
|
|
808
816
|
/**
|
|
@@ -963,6 +971,12 @@ not be freed by client code.
|
|
|
963
971
|
*/
|
|
964
972
|
const char *tsk_strerror(int err);
|
|
965
973
|
|
|
974
|
+
/* Redefine this macro in downstream builds if stdout is not the
|
|
975
|
+
* approriate stream to emit debug information when the TSK_DEBUG
|
|
976
|
+
* flag is passed to supporting functions (e.g. in R).
|
|
977
|
+
*/
|
|
978
|
+
#define TSK_DEFAULT_DEBUG_STREAM stdout
|
|
979
|
+
|
|
966
980
|
#ifdef TSK_TRACE_ERRORS
|
|
967
981
|
|
|
968
982
|
static inline int
|
|
@@ -973,6 +987,11 @@ _tsk_trace_error(int err, int line, const char *file)
|
|
|
973
987
|
return err;
|
|
974
988
|
}
|
|
975
989
|
|
|
990
|
+
/*
|
|
991
|
+
Developer note: this macro may be redefined as part of compilation for
|
|
992
|
+
an R package, and should be treated as part of the documented API
|
|
993
|
+
(no changes).
|
|
994
|
+
*/
|
|
976
995
|
#define tsk_trace_error(err) (_tsk_trace_error(err, __LINE__, __FILE__))
|
|
977
996
|
#else
|
|
978
997
|
#define tsk_trace_error(err) (err)
|
|
@@ -993,6 +1012,11 @@ means compiling without NDEBUG. This macro still asserts when NDEBUG is defined.
|
|
|
993
1012
|
If you are using this macro in your own software then please set TSK_BUG_ASSERT_MESSAGE
|
|
994
1013
|
to point users to your issue tracker.
|
|
995
1014
|
*/
|
|
1015
|
+
/*
|
|
1016
|
+
Developer note: this macro may redefined as part of compilation for
|
|
1017
|
+
an R package, and should be treated as part of the documented API
|
|
1018
|
+
(no changes).
|
|
1019
|
+
*/
|
|
996
1020
|
#define tsk_bug_assert(condition) \
|
|
997
1021
|
do { \
|
|
998
1022
|
if (!(condition)) { \
|
|
@@ -1104,29 +1128,31 @@ FILE *tsk_get_debug_stream(void);
|
|
|
1104
1128
|
|
|
1105
1129
|
/* Bit Array functionality */
|
|
1106
1130
|
|
|
1107
|
-
|
|
1131
|
+
// define a 32-bit chunk size for our bitsets.
|
|
1132
|
+
// this means we'll be able to hold 32 distinct items in each 32 bit uint
|
|
1133
|
+
#define TSK_BITSET_BITS ((tsk_size_t) 32)
|
|
1134
|
+
typedef uint32_t tsk_bitset_val_t;
|
|
1135
|
+
|
|
1108
1136
|
typedef struct {
|
|
1109
|
-
tsk_size_t
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
void
|
|
1119
|
-
const
|
|
1120
|
-
void
|
|
1121
|
-
|
|
1122
|
-
void
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
tsk_size_t
|
|
1128
|
-
void tsk_bit_array_get_items(
|
|
1129
|
-
const tsk_bit_array_t *self, tsk_id_t *items, tsk_size_t *n_items);
|
|
1137
|
+
tsk_size_t row_len; // Number of size TSK_BITSET_BITS chunks per row
|
|
1138
|
+
tsk_size_t len; // Number of rows
|
|
1139
|
+
tsk_bitset_val_t *data;
|
|
1140
|
+
} tsk_bitset_t;
|
|
1141
|
+
|
|
1142
|
+
int tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length);
|
|
1143
|
+
void tsk_bitset_free(tsk_bitset_t *self);
|
|
1144
|
+
void tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
|
|
1145
|
+
const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out);
|
|
1146
|
+
void tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row,
|
|
1147
|
+
const tsk_bitset_t *other, tsk_size_t other_row);
|
|
1148
|
+
void tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
|
|
1149
|
+
tsk_size_t other_row);
|
|
1150
|
+
void tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
|
|
1151
|
+
bool tsk_bitset_contains(
|
|
1152
|
+
const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
|
|
1153
|
+
tsk_size_t tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row);
|
|
1154
|
+
void tsk_bitset_get_items(
|
|
1155
|
+
const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items);
|
|
1130
1156
|
|
|
1131
1157
|
#ifdef __cplusplus
|
|
1132
1158
|
}
|