tskit 1.0.0b3__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {tskit-1.0.0b3/tskit.egg-info → tskit-1.0.1}/PKG-INFO +8 -8
  2. {tskit-1.0.0b3 → tskit-1.0.1}/_tskitmodule.c +179 -3
  3. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/core.c +75 -52
  4. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/core.h +50 -24
  5. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/genotypes.c +231 -8
  6. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/tables.c +47 -4
  7. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/tables.h +15 -1
  8. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/trees.c +333 -245
  9. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/trees.h +54 -5
  10. {tskit-1.0.0b3 → tskit-1.0.1}/pyproject.toml +7 -7
  11. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_balance_metrics.py +1 -1
  12. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_cli.py +1 -3
  13. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_divmat.py +48 -0
  14. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_genotypes.py +615 -67
  15. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_highlevel.py +74 -0
  16. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_immutable_table_collection.py +26 -0
  17. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_ld_matrix.py +28 -32
  18. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_lowlevel.py +183 -1
  19. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_metadata.py +29 -0
  20. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_phylo_formats.py +2 -6
  21. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_relatedness_vector.py +88 -22
  22. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_tables.py +248 -0
  23. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_topology.py +87 -14
  24. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/_version.py +1 -1
  25. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/drawing.py +2 -4
  26. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/genotypes.py +23 -20
  27. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/metadata.py +1 -1
  28. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/tables.py +51 -26
  29. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/text_formats.py +4 -0
  30. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/trees.py +413 -245
  31. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/util.py +6 -7
  32. {tskit-1.0.0b3 → tskit-1.0.1/tskit.egg-info}/PKG-INFO +8 -8
  33. {tskit-1.0.0b3 → tskit-1.0.1}/tskit.egg-info/requires.txt +5 -5
  34. {tskit-1.0.0b3 → tskit-1.0.1}/LICENSE +0 -0
  35. {tskit-1.0.0b3 → tskit-1.0.1}/MANIFEST.in +0 -0
  36. {tskit-1.0.0b3 → tskit-1.0.1}/README.rst +0 -0
  37. {tskit-1.0.0b3 → tskit-1.0.1}/lib/subprojects/kastore/kastore.c +0 -0
  38. {tskit-1.0.0b3 → tskit-1.0.1}/lib/subprojects/kastore/kastore.h +0 -0
  39. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/convert.c +0 -0
  40. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/convert.h +0 -0
  41. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/genotypes.h +0 -0
  42. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/haplotype_matching.c +0 -0
  43. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/haplotype_matching.h +0 -0
  44. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/stats.c +0 -0
  45. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit/stats.h +0 -0
  46. {tskit-1.0.0b3 → tskit-1.0.1}/lib/tskit.h +0 -0
  47. {tskit-1.0.0b3 → tskit-1.0.1}/lwt_interface/tskit_lwt_interface.h +0 -0
  48. {tskit-1.0.0b3 → tskit-1.0.1}/setup.cfg +0 -0
  49. {tskit-1.0.0b3 → tskit-1.0.1}/setup.py +0 -0
  50. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_avl_tree.py +0 -0
  51. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_coalrate.py +0 -0
  52. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_combinatorics.py +0 -0
  53. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_dict_encoding.py +0 -0
  54. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_distance_metrics.py +0 -0
  55. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_drawing.py +0 -0
  56. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_extend_haplotypes.py +0 -0
  57. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_file_format.py +0 -0
  58. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_fileobj.py +0 -0
  59. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_genotype_matching.py +0 -0
  60. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_haplotype_matching.py +0 -0
  61. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_ibd.py +0 -0
  62. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_intervals.py +0 -0
  63. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_jit.py +0 -0
  64. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_ms.py +0 -0
  65. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_parsimony.py +0 -0
  66. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_provenance.py +0 -0
  67. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_reference_sequence.py +0 -0
  68. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_stats.py +0 -0
  69. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_table_transforms.py +0 -0
  70. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_text_formats.py +0 -0
  71. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_threads.py +0 -0
  72. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_tree_positioning.py +0 -0
  73. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_tree_stats.py +0 -0
  74. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_util.py +0 -0
  75. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_utilities.py +0 -0
  76. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_vcf.py +0 -0
  77. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_version.py +0 -0
  78. {tskit-1.0.0b3 → tskit-1.0.1}/tests/test_wright_fisher.py +0 -0
  79. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/__init__.py +0 -0
  80. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/__main__.py +0 -0
  81. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/cli.py +0 -0
  82. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/combinatorics.py +0 -0
  83. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/exceptions.py +0 -0
  84. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/intervals.py +0 -0
  85. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/jit/__init__.py +0 -0
  86. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/jit/numba.py +0 -0
  87. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/provenance.py +0 -0
  88. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/provenance.schema.json +0 -0
  89. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/stats.py +0 -0
  90. {tskit-1.0.0b3 → tskit-1.0.1}/tskit/vcf.py +0 -0
  91. {tskit-1.0.0b3 → tskit-1.0.1}/tskit.egg-info/SOURCES.txt +0 -0
  92. {tskit-1.0.0b3 → tskit-1.0.1}/tskit.egg-info/dependency_links.txt +0 -0
  93. {tskit-1.0.0b3 → tskit-1.0.1}/tskit.egg-info/entry_points.txt +0 -0
  94. {tskit-1.0.0b3 → tskit-1.0.1}/tskit.egg-info/top_level.txt +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tskit
3
- Version: 1.0.0b3
3
+ Version: 1.0.1
4
4
  Summary: The tree sequence toolkit.
5
5
  Author-email: Tskit Developers <admin@tskit.dev>
6
- License: MIT
6
+ License-Expression: MIT
7
7
  Project-URL: Homepage, https://tskit.dev/tskit
8
8
  Project-URL: Documentation, https://tskit.dev/tskit/docs/stable
9
9
  Project-URL: Changelog, https://tskit.dev/tskit/docs/stable/changelogs.html
@@ -17,11 +17,11 @@ Classifier: Programming Language :: Python :: 3.10
17
17
  Classifier: Programming Language :: Python :: 3.11
18
18
  Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Programming Language :: Python :: 3.14
20
21
  Classifier: Programming Language :: Python :: 3 :: Only
21
22
  Classifier: Development Status :: 5 - Production/Stable
22
23
  Classifier: Environment :: Other Environment
23
24
  Classifier: Intended Audience :: Science/Research
24
- Classifier: License :: OSI Approved :: MIT License
25
25
  Classifier: Operating System :: POSIX
26
26
  Classifier: Operating System :: MacOS :: MacOS X
27
27
  Classifier: Operating System :: Microsoft :: Windows
@@ -39,9 +39,9 @@ Requires-Dist: dendropy==5.0.1; extra == "test"
39
39
  Requires-Dist: kastore==0.3.3; extra == "test"
40
40
  Requires-Dist: lshmm==0.0.8; extra == "test"
41
41
  Requires-Dist: msgpack==1.1.0; extra == "test"
42
- Requires-Dist: msprime==1.3.4; extra == "test"
42
+ Requires-Dist: msprime==1.4.0b2; extra == "test"
43
43
  Requires-Dist: networkx==3.2.1; extra == "test"
44
- Requires-Dist: numba==0.61.2; extra == "test"
44
+ Requires-Dist: numba==0.63.1; extra == "test"
45
45
  Requires-Dist: portion==2.6.0; extra == "test"
46
46
  Requires-Dist: pytest==8.3.5; extra == "test"
47
47
  Requires-Dist: pytest-cov==6.0.0; extra == "test"
@@ -57,8 +57,8 @@ Requires-Dist: breathe==4.35.0; extra == "docs"
57
57
  Requires-Dist: sphinx-autodoc-typehints==2.3.0; extra == "docs"
58
58
  Requires-Dist: sphinx-issues==5.0.0; extra == "docs"
59
59
  Requires-Dist: sphinx-argparse==0.5.2; extra == "docs"
60
- Requires-Dist: msprime==1.3.3; extra == "docs"
61
- Requires-Dist: numba==0.61.2; extra == "docs"
60
+ Requires-Dist: msprime==1.4.0b2; extra == "docs"
61
+ Requires-Dist: numba==0.63.1; extra == "docs"
62
62
  Requires-Dist: sphinx-book-theme; extra == "docs"
63
63
  Requires-Dist: pandas==2.2.3; extra == "docs"
64
64
  Provides-Extra: dev
@@ -84,7 +84,7 @@ Requires-Dist: tszip; extra == "dev"
84
84
  Requires-Dist: xmlunittest; extra == "dev"
85
85
  Requires-Dist: newick; extra == "dev"
86
86
  Requires-Dist: zarr<3; extra == "dev"
87
- Requires-Dist: jupyter-book; extra == "dev"
87
+ Requires-Dist: jupyter-book<2; extra == "dev"
88
88
  Requires-Dist: breathe; extra == "dev"
89
89
  Requires-Dist: sphinx-autodoc-typehints; extra == "dev"
90
90
  Requires-Dist: sphinx-issues; extra == "dev"
@@ -4347,15 +4347,18 @@ TableCollection_union(TableCollection *self, PyObject *args, PyObject *kwds)
4347
4347
  npy_intp *shape;
4348
4348
  tsk_flags_t options = 0;
4349
4349
  int check_shared = true;
4350
+ int all_edges = false;
4351
+ int all_mutations = false;
4350
4352
  int add_populations = true;
4351
4353
  static char *kwlist[] = { "other", "other_node_mapping", "check_shared_equality",
4352
- "add_populations", NULL };
4354
+ "add_populations", "all_edges", "all_mutations", NULL };
4353
4355
 
4354
4356
  if (TableCollection_check_state(self) != 0) {
4355
4357
  goto out;
4356
4358
  }
4357
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|ii", kwlist, &TableCollectionType,
4358
- &other, &other_node_mapping, &check_shared, &add_populations)) {
4359
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|iiii", kwlist,
4360
+ &TableCollectionType, &other, &other_node_mapping, &check_shared,
4361
+ &add_populations, &all_edges, &all_mutations)) {
4359
4362
  goto out;
4360
4363
  }
4361
4364
  nmap_array = (PyArrayObject *) PyArray_FROMANY(
@@ -4370,6 +4373,12 @@ TableCollection_union(TableCollection *self, PyObject *args, PyObject *kwds)
4370
4373
  " number of nodes in the other tree sequence.");
4371
4374
  goto out;
4372
4375
  }
4376
+ if (all_edges) {
4377
+ options |= TSK_UNION_ALL_EDGES;
4378
+ }
4379
+ if (all_mutations) {
4380
+ options |= TSK_UNION_ALL_MUTATIONS;
4381
+ }
4373
4382
  if (!check_shared) {
4374
4383
  options |= TSK_UNION_NO_CHECK_SHARED;
4375
4384
  }
@@ -5335,6 +5344,69 @@ out:
5335
5344
  return ret;
5336
5345
  }
5337
5346
 
5347
+ static PyObject *
5348
+ TreeSequence_link_ancestors(TreeSequence *self, PyObject *args, PyObject *kwds)
5349
+ {
5350
+ int err;
5351
+ PyObject *ret = NULL;
5352
+ PyObject *samples = NULL;
5353
+ PyObject *ancestors = NULL;
5354
+ PyArrayObject *samples_array = NULL;
5355
+ PyArrayObject *ancestors_array = NULL;
5356
+ npy_intp *shape;
5357
+ tsk_size_t num_samples, num_ancestors;
5358
+ EdgeTable *result = NULL;
5359
+ PyObject *result_args = NULL;
5360
+ static char *kwlist[] = { "samples", "ancestors", NULL };
5361
+
5362
+ if (TreeSequence_check_state(self) != 0) {
5363
+ goto out;
5364
+ }
5365
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO", kwlist, &samples, &ancestors)) {
5366
+ goto out;
5367
+ }
5368
+
5369
+ samples_array = (PyArrayObject *) PyArray_FROMANY(
5370
+ samples, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);
5371
+ if (samples_array == NULL) {
5372
+ goto out;
5373
+ }
5374
+ shape = PyArray_DIMS(samples_array);
5375
+ num_samples = (tsk_size_t) shape[0];
5376
+
5377
+ ancestors_array = (PyArrayObject *) PyArray_FROMANY(
5378
+ ancestors, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);
5379
+ if (ancestors_array == NULL) {
5380
+ goto out;
5381
+ }
5382
+ shape = PyArray_DIMS(ancestors_array);
5383
+ num_ancestors = (tsk_size_t) shape[0];
5384
+
5385
+ result_args = PyTuple_New(0);
5386
+ if (result_args == NULL) {
5387
+ goto out;
5388
+ }
5389
+ result = (EdgeTable *) PyObject_CallObject((PyObject *) &EdgeTableType, result_args);
5390
+ if (result == NULL) {
5391
+ goto out;
5392
+ }
5393
+ err = tsk_table_collection_link_ancestors(self->tree_sequence->tables,
5394
+ PyArray_DATA(samples_array), num_samples, PyArray_DATA(ancestors_array),
5395
+ num_ancestors, 0, result->table);
5396
+ if (err != 0) {
5397
+ handle_library_error(err);
5398
+ goto out;
5399
+ }
5400
+ ret = (PyObject *) result;
5401
+ result = NULL;
5402
+ out:
5403
+ Py_XDECREF(samples_array);
5404
+ Py_XDECREF(ancestors_array);
5405
+ Py_XDECREF(result);
5406
+ Py_XDECREF(result_args);
5407
+ return ret;
5408
+ }
5409
+
5338
5410
  static PyObject *
5339
5411
  TreeSequence_load(TreeSequence *self, PyObject *args, PyObject *kwds)
5340
5412
  {
@@ -6070,6 +6142,102 @@ out:
6070
6142
  return ret;
6071
6143
  }
6072
6144
 
6145
+ static PyObject *
6146
+ TreeSequence_decode_alignments(TreeSequence *self, PyObject *args, PyObject *kwds)
6147
+ {
6148
+ int err;
6149
+ PyObject *ret = NULL;
6150
+ PyObject *py_ref, *py_nodes, *py_missing;
6151
+ PyArrayObject *nodes_array = NULL;
6152
+ const char *ref_seq;
6153
+ Py_ssize_t ref_len, missing_len;
6154
+ tsk_id_t *nodes;
6155
+ tsk_size_t num_nodes;
6156
+ double left, right;
6157
+ char missing_char;
6158
+ const char *missing_utf8;
6159
+ int isolated_as_missing = 1;
6160
+ tsk_flags_t options = 0;
6161
+ PyObject *buf_obj = NULL;
6162
+ char *buf = NULL;
6163
+
6164
+ static char *kwlist[] = { "reference_sequence", "nodes", "left", "right",
6165
+ "missing_data_character", "isolated_as_missing", NULL };
6166
+
6167
+ if (TreeSequence_check_state(self) != 0) {
6168
+ goto out;
6169
+ }
6170
+
6171
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOddOp", kwlist, &py_ref, &py_nodes,
6172
+ &left, &right, &py_missing, &isolated_as_missing)) {
6173
+ goto out;
6174
+ }
6175
+
6176
+ if (!PyBytes_Check(py_ref)) {
6177
+ PyErr_SetString(PyExc_TypeError, "reference_sequence must be bytes");
6178
+ goto out;
6179
+ }
6180
+ if (PyBytes_AsStringAndSize(py_ref, (char **) &ref_seq, &ref_len) < 0) {
6181
+ goto out;
6182
+ }
6183
+
6184
+ if (!PyUnicode_Check(py_missing)) {
6185
+ PyErr_SetString(
6186
+ PyExc_TypeError, "missing_data_character must be a (length 1) string");
6187
+ goto out;
6188
+ }
6189
+ missing_utf8 = PyUnicode_AsUTF8AndSize(py_missing, &missing_len);
6190
+ if (missing_utf8 == NULL) {
6191
+ goto out;
6192
+ }
6193
+ if (missing_len != 1) {
6194
+ PyErr_SetString(
6195
+ PyExc_TypeError, "missing_data_character must be a single character");
6196
+ goto out;
6197
+ }
6198
+ missing_char = missing_utf8[0];
6199
+
6200
+ if (!isolated_as_missing) {
6201
+ options |= TSK_ISOLATED_NOT_MISSING;
6202
+ }
6203
+
6204
+ nodes_array = (PyArrayObject *) PyArray_FROMANY(
6205
+ py_nodes, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);
6206
+ if (nodes_array == NULL) {
6207
+ goto out;
6208
+ }
6209
+ num_nodes = (tsk_size_t) PyArray_DIM(nodes_array, 0);
6210
+ nodes = PyArray_DATA(nodes_array);
6211
+
6212
+ buf_obj = PyBytes_FromStringAndSize(
6213
+ NULL, (Py_ssize_t)(num_nodes * (tsk_size_t)(right - left)));
6214
+ if (buf_obj == NULL) {
6215
+ goto out;
6216
+ }
6217
+ buf = PyBytes_AS_STRING(buf_obj);
6218
+
6219
+ // clang-format off
6220
+ Py_BEGIN_ALLOW_THREADS
6221
+ err = tsk_treeseq_decode_alignments(self->tree_sequence,
6222
+ ref_seq, (tsk_size_t) ref_len, nodes, num_nodes, left, right, missing_char, buf,
6223
+ options);
6224
+ Py_END_ALLOW_THREADS
6225
+ // clang-format on
6226
+ if (err != 0)
6227
+ {
6228
+ handle_library_error(err);
6229
+ goto out;
6230
+ }
6231
+
6232
+ ret = buf_obj;
6233
+ buf_obj = NULL;
6234
+
6235
+ out:
6236
+ Py_XDECREF(nodes_array);
6237
+ Py_XDECREF(buf_obj);
6238
+ return ret;
6239
+ }
6240
+
6073
6241
  static PyObject *
6074
6242
  TreeSequence_get_mutations_edge(TreeSequence *self)
6075
6243
  {
@@ -8519,6 +8687,10 @@ static PyMethodDef TreeSequence_methods[] = {
8519
8687
  .ml_meth = (PyCFunction) TreeSequence_dump_tables,
8520
8688
  .ml_flags = METH_VARARGS | METH_KEYWORDS,
8521
8689
  .ml_doc = "Dumps the tree sequence to the specified set of tables" },
8690
+ { .ml_name = "link_ancestors",
8691
+ .ml_meth = (PyCFunction) TreeSequence_link_ancestors,
8692
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
8693
+ .ml_doc = "Returns an EdgeTable linking the specified samples and ancestors." },
8522
8694
  { .ml_name = "get_node",
8523
8695
  .ml_meth = (PyCFunction) TreeSequence_get_node,
8524
8696
  .ml_flags = METH_VARARGS,
@@ -8651,6 +8823,10 @@ static PyMethodDef TreeSequence_methods[] = {
8651
8823
  .ml_meth = (PyCFunction) TreeSequence_get_individuals_nodes,
8652
8824
  .ml_flags = METH_NOARGS,
8653
8825
  .ml_doc = "Returns an array of the node ids for each individual" },
8826
+ { .ml_name = "decode_alignments",
8827
+ .ml_meth = (PyCFunction) TreeSequence_decode_alignments,
8828
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
8829
+ .ml_doc = "Decode full alignments for given nodes and interval." },
8654
8830
  { .ml_name = "get_mutations_edge",
8655
8831
  .ml_meth = (PyCFunction) TreeSequence_get_mutations_edge,
8656
8832
  .ml_flags = METH_NOARGS,
@@ -584,6 +584,14 @@ tsk_strerror_internal(int err)
584
584
  ret = "Must have at least one allele when specifying an allele map. "
585
585
  "(TSK_ERR_ZERO_ALLELES)";
586
586
  break;
587
+ case TSK_ERR_BAD_ALLELE_LENGTH:
588
+ ret = "Alleles used when decoding alignments must have length one. "
589
+ "(TSK_ERR_BAD_ALLELE_LENGTH)";
590
+ break;
591
+ case TSK_ERR_MISSING_CHAR_COLLISION:
592
+ ret = "Alleles used when decoding alignments must not match the missing "
593
+ "data character. (TSK_ERR_MISSING_CHAR_COLLISION)";
594
+ break;
587
595
 
588
596
  /* Distance metric errors */
589
597
  case TSK_ERR_SAMPLE_SIZE_MISMATCH:
@@ -1033,7 +1041,7 @@ FILE *
1033
1041
  tsk_get_debug_stream(void)
1034
1042
  {
1035
1043
  if (_tsk_debug_stream == NULL) {
1036
- _tsk_debug_stream = stdout;
1044
+ _tsk_debug_stream = TSK_DEFAULT_DEBUG_STREAM;
1037
1045
  }
1038
1046
  return _tsk_debug_stream;
1039
1047
  }
@@ -1260,16 +1268,16 @@ tsk_avl_tree_int_ordered_nodes(const tsk_avl_tree_int_t *self, tsk_avl_node_int_
1260
1268
  }
1261
1269
 
1262
1270
  // Bit Array implementation. Allows us to store unsigned integers in a compact manner.
1263
- // Currently implemented as an array of 32-bit unsigned integers for ease of counting.
1271
+ // Currently implemented as an array of 32-bit unsigned integers.
1264
1272
 
1265
1273
  int
1266
- tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length)
1274
+ tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length)
1267
1275
  {
1268
1276
  int ret = 0;
1269
1277
 
1270
- self->size = (num_bits >> TSK_BIT_ARRAY_CHUNK)
1271
- + (num_bits % TSK_BIT_ARRAY_NUM_BITS ? 1 : 0);
1272
- self->data = tsk_calloc(self->size * length, sizeof(*self->data));
1278
+ self->row_len = (num_bits / TSK_BITSET_BITS) + (num_bits % TSK_BITSET_BITS ? 1 : 0);
1279
+ self->len = length;
1280
+ self->data = tsk_calloc(self->row_len * length, sizeof(*self->data));
1273
1281
  if (self->data == NULL) {
1274
1282
  ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
1275
1283
  goto out;
@@ -1278,96 +1286,111 @@ out:
1278
1286
  return ret;
1279
1287
  }
1280
1288
 
1281
- void
1282
- tsk_bit_array_get_row(const tsk_bit_array_t *self, tsk_size_t row, tsk_bit_array_t *out)
1283
- {
1284
- out->size = self->size;
1285
- out->data = self->data + (row * self->size);
1286
- }
1289
+ #define BITSET_DATA_ROW(bs, row) ((bs)->data + (row) * (bs)->row_len)
1287
1290
 
1288
1291
  void
1289
- tsk_bit_array_intersect(
1290
- const tsk_bit_array_t *self, const tsk_bit_array_t *other, tsk_bit_array_t *out)
1292
+ tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
1293
+ const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out)
1291
1294
  {
1292
- for (tsk_size_t i = 0; i < self->size; i++) {
1293
- out->data[i] = self->data[i] & other->data[i];
1295
+ const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
1296
+ const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
1297
+ tsk_bitset_val_t *restrict out_d = out->data;
1298
+ for (tsk_size_t i = 0; i < self->row_len; i++) {
1299
+ out_d[i] = self_d[i] & other_d[i];
1294
1300
  }
1295
1301
  }
1296
1302
 
1297
1303
  void
1298
- tsk_bit_array_subtract(tsk_bit_array_t *self, const tsk_bit_array_t *other)
1304
+ tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
1305
+ tsk_size_t other_row)
1299
1306
  {
1300
- for (tsk_size_t i = 0; i < self->size; i++) {
1301
- self->data[i] &= ~(other->data[i]);
1307
+ tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
1308
+ const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
1309
+ for (tsk_size_t i = 0; i < self->row_len; i++) {
1310
+ self_d[i] &= ~(other_d[i]);
1302
1311
  }
1303
1312
  }
1304
1313
 
1305
1314
  void
1306
- tsk_bit_array_add(tsk_bit_array_t *self, const tsk_bit_array_t *other)
1315
+ tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
1316
+ tsk_size_t other_row)
1307
1317
  {
1308
- for (tsk_size_t i = 0; i < self->size; i++) {
1309
- self->data[i] |= other->data[i];
1318
+ tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
1319
+ const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
1320
+ for (tsk_size_t i = 0; i < self->row_len; i++) {
1321
+ self_d[i] |= other_d[i];
1310
1322
  }
1311
1323
  }
1312
1324
 
1313
1325
  void
1314
- tsk_bit_array_add_bit(tsk_bit_array_t *self, const tsk_bit_array_value_t bit)
1326
+ tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
1315
1327
  {
1316
- tsk_bit_array_value_t i = bit >> TSK_BIT_ARRAY_CHUNK;
1317
- self->data[i] |= (tsk_bit_array_value_t) 1 << (bit - (TSK_BIT_ARRAY_NUM_BITS * i));
1328
+ tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
1329
+ *(BITSET_DATA_ROW(self, row) + i) |= (tsk_bitset_val_t) 1
1330
+ << (bit - (TSK_BITSET_BITS * i));
1318
1331
  }
1319
1332
 
1320
1333
  bool
1321
- tsk_bit_array_contains(const tsk_bit_array_t *self, const tsk_bit_array_value_t bit)
1334
+ tsk_bitset_contains(const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
1322
1335
  {
1323
- tsk_bit_array_value_t i = bit >> TSK_BIT_ARRAY_CHUNK;
1324
- return self->data[i]
1325
- & ((tsk_bit_array_value_t) 1 << (bit - (TSK_BIT_ARRAY_NUM_BITS * i)));
1336
+ tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
1337
+ return *(BITSET_DATA_ROW(self, row) + i)
1338
+ & ((tsk_bitset_val_t) 1 << (bit - (TSK_BITSET_BITS * i)));
1326
1339
  }
1327
1340
 
1328
- tsk_size_t
1329
- tsk_bit_array_count(const tsk_bit_array_t *self)
1341
+ static inline uint32_t
1342
+ popcount(tsk_bitset_val_t v)
1330
1343
  {
1331
- // Utilizes 12 operations per bit array. NB this only works on 32 bit integers.
1344
+ // Utilizes 12 operations per chunk. NB this only works on 32 bit integers.
1332
1345
  // Taken from:
1333
1346
  // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
1334
1347
  // There's a nice breakdown of this algorithm here:
1335
1348
  // https://stackoverflow.com/a/109025
1336
- // Could probably do better with explicit SIMD (instead of SWAR), but not as
1337
- // portable: https://arxiv.org/pdf/1611.07612.pdf
1338
1349
  //
1339
- // There is one solution to explore further, which uses __builtin_popcountll.
1340
- // This option is relatively simple, but requires a 64 bit bit array and also
1341
- // involves some compiler flag plumbing (-mpopcnt)
1350
+ // The gcc/clang compiler flag will -mpopcnt will convert this code to a
1351
+ // popcnt instruction (most if not all modern CPUs will support this). The
1352
+ // popcnt instruction will yield some speed improvements, which depend on
1353
+ // the tree sequence.
1354
+ //
1355
+ // NB: 32bit counting is typically faster than 64bit counting for this task.
1356
+ // (at least on x86-64)
1357
+
1358
+ v = v - ((v >> 1) & 0x55555555);
1359
+ v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
1360
+ return (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
1361
+ }
1342
1362
 
1343
- tsk_bit_array_value_t tmp;
1344
- tsk_size_t i, count = 0;
1363
+ tsk_size_t
1364
+ tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row)
1365
+ {
1366
+ tsk_size_t i = 0;
1367
+ tsk_size_t count = 0;
1368
+ const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);
1345
1369
 
1346
- for (i = 0; i < self->size; i++) {
1347
- tmp = self->data[i] - ((self->data[i] >> 1) & 0x55555555);
1348
- tmp = (tmp & 0x33333333) + ((tmp >> 2) & 0x33333333);
1349
- count += (((tmp + (tmp >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
1370
+ for (i = 0; i < self->row_len; i++) {
1371
+ count += popcount(self_d[i]);
1350
1372
  }
1351
1373
  return count;
1352
1374
  }
1353
1375
 
1354
1376
  void
1355
- tsk_bit_array_get_items(
1356
- const tsk_bit_array_t *self, tsk_id_t *items, tsk_size_t *n_items)
1377
+ tsk_bitset_get_items(
1378
+ const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items)
1357
1379
  {
1358
1380
  // Get the items stored in the row of a bitset.
1359
- // Uses a de Bruijn sequence lookup table to determine the lowest bit set. See the
1360
- // wikipedia article for more info: https://w.wiki/BYiF
1381
+ // Uses a de Bruijn sequence lookup table to determine the lowest bit set.
1382
+ // See the wikipedia article for more info: https://w.wiki/BYiF
1361
1383
 
1362
1384
  tsk_size_t i, n, off;
1363
- tsk_bit_array_value_t v, lsb; // least significant bit
1385
+ tsk_bitset_val_t v, lsb; // least significant bit
1364
1386
  static const tsk_id_t lookup[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25,
1365
1387
  17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
1388
+ const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);
1366
1389
 
1367
1390
  n = 0;
1368
- for (i = 0; i < self->size; i++) {
1369
- v = self->data[i];
1370
- off = i * ((tsk_size_t) TSK_BIT_ARRAY_NUM_BITS);
1391
+ for (i = 0; i < self->row_len; i++) {
1392
+ v = self_d[i];
1393
+ off = i * TSK_BITSET_BITS;
1371
1394
  if (v == 0) {
1372
1395
  continue;
1373
1396
  }
@@ -1381,7 +1404,7 @@ tsk_bit_array_get_items(
1381
1404
  }
1382
1405
 
1383
1406
  void
1384
- tsk_bit_array_free(tsk_bit_array_t *self)
1407
+ tsk_bitset_free(tsk_bitset_t *self)
1385
1408
  {
1386
1409
  tsk_safe_free(self->data);
1387
1410
  }
@@ -147,7 +147,7 @@ sizes and types of externally visible structs.
147
147
  The library minor version. Incremented when non-breaking backward-compatible changes
148
148
  to the API or ABI are introduced, i.e., the addition of a new function.
149
149
  */
150
- #define TSK_VERSION_MINOR 2
150
+ #define TSK_VERSION_MINOR 3
151
151
  /**
152
152
  The library patch version. Incremented when any changes not relevant to the
153
153
  to the API or ABI are introduced, i.e., internal refactors of bugfixes.
@@ -511,7 +511,7 @@ disallowed (use compute_mutation_times?).
511
511
  */
512
512
  #define TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME -510
513
513
 
514
- /**
514
+ /**
515
515
  A mutation's parent was not consistent with the topology of the tree.
516
516
  */
517
517
  #define TSK_ERR_BAD_MUTATION_PARENT -511
@@ -803,6 +803,14 @@ More than 2147483647 alleles were specified.
803
803
  A user-specified allele map was used, but it contained zero alleles.
804
804
  */
805
805
  #define TSK_ERR_ZERO_ALLELES -1103
806
+ /**
807
+ An allele used when decoding alignments had length other than one.
808
+ */
809
+ #define TSK_ERR_BAD_ALLELE_LENGTH -1104
810
+ /**
811
+ An allele used when decoding alignments matched the missing data character.
812
+ */
813
+ #define TSK_ERR_MISSING_CHAR_COLLISION -1105
806
814
  /** @} */
807
815
 
808
816
  /**
@@ -963,6 +971,12 @@ not be freed by client code.
963
971
  */
964
972
  const char *tsk_strerror(int err);
965
973
 
974
+ /* Redefine this macro in downstream builds if stdout is not the
975
+ * approriate stream to emit debug information when the TSK_DEBUG
976
+ * flag is passed to supporting functions (e.g. in R).
977
+ */
978
+ #define TSK_DEFAULT_DEBUG_STREAM stdout
979
+
966
980
  #ifdef TSK_TRACE_ERRORS
967
981
 
968
982
  static inline int
@@ -973,6 +987,11 @@ _tsk_trace_error(int err, int line, const char *file)
973
987
  return err;
974
988
  }
975
989
 
990
+ /*
991
+ Developer note: this macro may be redefined as part of compilation for
992
+ an R package, and should be treated as part of the documented API
993
+ (no changes).
994
+ */
976
995
  #define tsk_trace_error(err) (_tsk_trace_error(err, __LINE__, __FILE__))
977
996
  #else
978
997
  #define tsk_trace_error(err) (err)
@@ -993,6 +1012,11 @@ means compiling without NDEBUG. This macro still asserts when NDEBUG is defined.
993
1012
  If you are using this macro in your own software then please set TSK_BUG_ASSERT_MESSAGE
994
1013
  to point users to your issue tracker.
995
1014
  */
1015
+ /*
1016
+ Developer note: this macro may redefined as part of compilation for
1017
+ an R package, and should be treated as part of the documented API
1018
+ (no changes).
1019
+ */
996
1020
  #define tsk_bug_assert(condition) \
997
1021
  do { \
998
1022
  if (!(condition)) { \
@@ -1104,29 +1128,31 @@ FILE *tsk_get_debug_stream(void);
1104
1128
 
1105
1129
  /* Bit Array functionality */
1106
1130
 
1107
- typedef uint32_t tsk_bit_array_value_t;
1131
+ // define a 32-bit chunk size for our bitsets.
1132
+ // this means we'll be able to hold 32 distinct items in each 32 bit uint
1133
+ #define TSK_BITSET_BITS ((tsk_size_t) 32)
1134
+ typedef uint32_t tsk_bitset_val_t;
1135
+
1108
1136
  typedef struct {
1109
- tsk_size_t size; // Number of chunks per row
1110
- tsk_bit_array_value_t *data; // Array data
1111
- } tsk_bit_array_t;
1112
-
1113
- #define TSK_BIT_ARRAY_CHUNK 5U
1114
- #define TSK_BIT_ARRAY_NUM_BITS (1U << TSK_BIT_ARRAY_CHUNK)
1115
-
1116
- int tsk_bit_array_init(tsk_bit_array_t *self, tsk_size_t num_bits, tsk_size_t length);
1117
- void tsk_bit_array_free(tsk_bit_array_t *self);
1118
- void tsk_bit_array_get_row(
1119
- const tsk_bit_array_t *self, tsk_size_t row, tsk_bit_array_t *out);
1120
- void tsk_bit_array_intersect(
1121
- const tsk_bit_array_t *self, const tsk_bit_array_t *other, tsk_bit_array_t *out);
1122
- void tsk_bit_array_subtract(tsk_bit_array_t *self, const tsk_bit_array_t *other);
1123
- void tsk_bit_array_add(tsk_bit_array_t *self, const tsk_bit_array_t *other);
1124
- void tsk_bit_array_add_bit(tsk_bit_array_t *self, const tsk_bit_array_value_t bit);
1125
- bool tsk_bit_array_contains(
1126
- const tsk_bit_array_t *self, const tsk_bit_array_value_t bit);
1127
- tsk_size_t tsk_bit_array_count(const tsk_bit_array_t *self);
1128
- void tsk_bit_array_get_items(
1129
- const tsk_bit_array_t *self, tsk_id_t *items, tsk_size_t *n_items);
1137
+ tsk_size_t row_len; // Number of size TSK_BITSET_BITS chunks per row
1138
+ tsk_size_t len; // Number of rows
1139
+ tsk_bitset_val_t *data;
1140
+ } tsk_bitset_t;
1141
+
1142
+ int tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length);
1143
+ void tsk_bitset_free(tsk_bitset_t *self);
1144
+ void tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
1145
+ const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out);
1146
+ void tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row,
1147
+ const tsk_bitset_t *other, tsk_size_t other_row);
1148
+ void tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
1149
+ tsk_size_t other_row);
1150
+ void tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
1151
+ bool tsk_bitset_contains(
1152
+ const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
1153
+ tsk_size_t tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row);
1154
+ void tsk_bitset_get_items(
1155
+ const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items);
1130
1156
 
1131
1157
  #ifdef __cplusplus
1132
1158
  }