stdvoidsim 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stdvoidsim/__init__.py +30 -0
- stdvoidsim/__main__.py +4 -0
- stdvoidsim/_version.py +34 -0
- stdvoidsim/annotations.py +90 -0
- stdvoidsim/cache.py +178 -0
- stdvoidsim/catalog/AzaPri/__init__.py +6 -0
- stdvoidsim/catalog/AzaPri/demographic_models.py +120 -0
- stdvoidsim/catalog/AzaPri/genome_data.py +10 -0
- stdvoidsim/catalog/AzaPri/species.py +65 -0
- stdvoidsim/catalog/BybWor/__init__.py +6 -0
- stdvoidsim/catalog/BybWor/demographic_models.py +158 -0
- stdvoidsim/catalog/BybWor/genome_data.py +43 -0
- stdvoidsim/catalog/BybWor/species.py +83 -0
- stdvoidsim/catalog/CatUlt/__init__.py +6 -0
- stdvoidsim/catalog/CatUlt/demographic_models.py +184 -0
- stdvoidsim/catalog/CatUlt/genome_data.py +38 -0
- stdvoidsim/catalog/CatUlt/species.py +69 -0
- stdvoidsim/catalog/ChaFau/__init__.py +6 -0
- stdvoidsim/catalog/ChaFau/demographic_models.py +188 -0
- stdvoidsim/catalog/ChaFau/genome_data.py +34 -0
- stdvoidsim/catalog/ChaFau/species.py +70 -0
- stdvoidsim/catalog/ColOos/__init__.py +6 -0
- stdvoidsim/catalog/ColOos/demographic_models.py +102 -0
- stdvoidsim/catalog/ColOos/genome_data.py +11 -0
- stdvoidsim/catalog/ColOos/species.py +50 -0
- stdvoidsim/catalog/CthGre/__init__.py +6 -0
- stdvoidsim/catalog/CthGre/demographic_models.py +175 -0
- stdvoidsim/catalog/CthGre/genome_data.py +38 -0
- stdvoidsim/catalog/CthGre/species.py +101 -0
- stdvoidsim/catalog/DagGod/__init__.py +6 -0
- stdvoidsim/catalog/DagGod/demographic_models.py +185 -0
- stdvoidsim/catalog/DagGod/genome_data.py +54 -0
- stdvoidsim/catalog/DagGod/species.py +72 -0
- stdvoidsim/catalog/DagHyd/__init__.py +6 -0
- stdvoidsim/catalog/DagHyd/demographic_models.py +184 -0
- stdvoidsim/catalog/DagHyd/genome_data.py +59 -0
- stdvoidsim/catalog/DagHyd/species.py +64 -0
- stdvoidsim/catalog/DarYou/__init__.py +6 -0
- stdvoidsim/catalog/DarYou/demographic_models.py +171 -0
- stdvoidsim/catalog/DarYou/genome_data.py +38 -0
- stdvoidsim/catalog/DarYou/species.py +101 -0
- stdvoidsim/catalog/DhoGno/__init__.py +6 -0
- stdvoidsim/catalog/DhoGno/demographic_models.py +103 -0
- stdvoidsim/catalog/DhoGno/genome_data.py +15 -0
- stdvoidsim/catalog/DhoGno/species.py +51 -0
- stdvoidsim/catalog/DimSha/__init__.py +6 -0
- stdvoidsim/catalog/DimSha/demographic_models.py +102 -0
- stdvoidsim/catalog/DimSha/genome_data.py +12 -0
- stdvoidsim/catalog/DimSha/species.py +51 -0
- stdvoidsim/catalog/EldThi/__init__.py +6 -0
- stdvoidsim/catalog/EldThi/demographic_models.py +176 -0
- stdvoidsim/catalog/EldThi/genome_data.py +39 -0
- stdvoidsim/catalog/EldThi/species.py +64 -0
- stdvoidsim/catalog/FirVam/__init__.py +6 -0
- stdvoidsim/catalog/FirVam/demographic_models.py +174 -0
- stdvoidsim/catalog/FirVam/genome_data.py +22 -0
- stdvoidsim/catalog/FirVam/species.py +88 -0
- stdvoidsim/catalog/FlyPol/__init__.py +6 -0
- stdvoidsim/catalog/FlyPol/demographic_models.py +103 -0
- stdvoidsim/catalog/FlyPol/genome_data.py +13 -0
- stdvoidsim/catalog/FlyPol/species.py +51 -0
- stdvoidsim/catalog/ForSpa/__init__.py +6 -0
- stdvoidsim/catalog/ForSpa/demographic_models.py +174 -0
- stdvoidsim/catalog/ForSpa/genome_data.py +26 -0
- stdvoidsim/catalog/ForSpa/species.py +91 -0
- stdvoidsim/catalog/GhaShe/__init__.py +6 -0
- stdvoidsim/catalog/GhaShe/demographic_models.py +102 -0
- stdvoidsim/catalog/GhaShe/genome_data.py +14 -0
- stdvoidsim/catalog/GhaShe/species.py +51 -0
- stdvoidsim/catalog/GhoFee/__init__.py +6 -0
- stdvoidsim/catalog/GhoFee/demographic_models.py +166 -0
- stdvoidsim/catalog/GhoFee/genome_data.py +51 -0
- stdvoidsim/catalog/GhoFee/species.py +88 -0
- stdvoidsim/catalog/GnpKeh/__init__.py +6 -0
- stdvoidsim/catalog/GnpKeh/demographic_models.py +174 -0
- stdvoidsim/catalog/GnpKeh/genome_data.py +38 -0
- stdvoidsim/catalog/GnpKeh/species.py +100 -0
- stdvoidsim/catalog/GugsUn/__init__.py +6 -0
- stdvoidsim/catalog/GugsUn/demographic_models.py +102 -0
- stdvoidsim/catalog/GugsUn/genome_data.py +13 -0
- stdvoidsim/catalog/GugsUn/species.py +51 -0
- stdvoidsim/catalog/HasKin/__init__.py +6 -0
- stdvoidsim/catalog/HasKin/demographic_models.py +153 -0
- stdvoidsim/catalog/HasKin/genome_data.py +47 -0
- stdvoidsim/catalog/HasKin/species.py +86 -0
- stdvoidsim/catalog/HouFir/__init__.py +6 -0
- stdvoidsim/catalog/HouFir/demographic_models.py +102 -0
- stdvoidsim/catalog/HouFir/genome_data.py +12 -0
- stdvoidsim/catalog/HouFir/species.py +51 -0
- stdvoidsim/catalog/HunTin/__init__.py +6 -0
- stdvoidsim/catalog/HunTin/demographic_models.py +171 -0
- stdvoidsim/catalog/HunTin/genome_data.py +30 -0
- stdvoidsim/catalog/HunTin/species.py +95 -0
- stdvoidsim/catalog/LenSpi/__init__.py +6 -0
- stdvoidsim/catalog/LenSpi/demographic_models.py +172 -0
- stdvoidsim/catalog/LenSpi/genome_data.py +34 -0
- stdvoidsim/catalog/LenSpi/species.py +98 -0
- stdvoidsim/catalog/MiGFun/__init__.py +6 -0
- stdvoidsim/catalog/MiGFun/demographic_models.py +159 -0
- stdvoidsim/catalog/MiGFun/genome_data.py +39 -0
- stdvoidsim/catalog/MiGFun/species.py +79 -0
- stdvoidsim/catalog/MooFun/__init__.py +6 -0
- stdvoidsim/catalog/MooFun/demographic_models.py +102 -0
- stdvoidsim/catalog/MooFun/genome_data.py +13 -0
- stdvoidsim/catalog/MooFun/species.py +51 -0
- stdvoidsim/catalog/NigMan/__init__.py +6 -0
- stdvoidsim/catalog/NigMan/demographic_models.py +102 -0
- stdvoidsim/catalog/NigMan/genome_data.py +15 -0
- stdvoidsim/catalog/NigMan/species.py +51 -0
- stdvoidsim/catalog/NyaAza/__init__.py +6 -0
- stdvoidsim/catalog/NyaAza/demographic_models.py +154 -0
- stdvoidsim/catalog/NyaAza/genome_data.py +48 -0
- stdvoidsim/catalog/NyaAza/species.py +84 -0
- stdvoidsim/catalog/RatThi/__init__.py +6 -0
- stdvoidsim/catalog/RatThi/demographic_models.py +174 -0
- stdvoidsim/catalog/RatThi/genome_data.py +42 -0
- stdvoidsim/catalog/RatThi/species.py +103 -0
- stdvoidsim/catalog/SanDre/__init__.py +6 -0
- stdvoidsim/catalog/SanDre/demographic_models.py +102 -0
- stdvoidsim/catalog/SanDre/genome_data.py +14 -0
- stdvoidsim/catalog/SanDre/species.py +51 -0
- stdvoidsim/catalog/SanDwl/__init__.py +6 -0
- stdvoidsim/catalog/SanDwl/demographic_models.py +185 -0
- stdvoidsim/catalog/SanDwl/genome_data.py +30 -0
- stdvoidsim/catalog/SanDwl/species.py +69 -0
- stdvoidsim/catalog/SerHum/__init__.py +6 -0
- stdvoidsim/catalog/SerHum/demographic_models.py +102 -0
- stdvoidsim/catalog/SerHum/genome_data.py +16 -0
- stdvoidsim/catalog/SerHum/species.py +51 -0
- stdvoidsim/catalog/ShbNig/__init__.py +6 -0
- stdvoidsim/catalog/ShbNig/demographic_models.py +108 -0
- stdvoidsim/catalog/ShbNig/genome_data.py +18 -0
- stdvoidsim/catalog/ShbNig/species.py +53 -0
- stdvoidsim/catalog/ShoNig/__init__.py +6 -0
- stdvoidsim/catalog/ShoNig/demographic_models.py +169 -0
- stdvoidsim/catalog/ShoNig/genome_data.py +63 -0
- stdvoidsim/catalog/ShoNig/species.py +97 -0
- stdvoidsim/catalog/StarSp/__init__.py +6 -0
- stdvoidsim/catalog/StarSp/demographic_models.py +102 -0
- stdvoidsim/catalog/StarSp/genome_data.py +14 -0
- stdvoidsim/catalog/StarSp/species.py +51 -0
- stdvoidsim/catalog/TsaCho/__init__.py +6 -0
- stdvoidsim/catalog/TsaCho/demographic_models.py +174 -0
- stdvoidsim/catalog/TsaCho/genome_data.py +46 -0
- stdvoidsim/catalog/TsaCho/species.py +107 -0
- stdvoidsim/catalog/TsaGod/__init__.py +6 -0
- stdvoidsim/catalog/TsaGod/demographic_models.py +174 -0
- stdvoidsim/catalog/TsaGod/genome_data.py +30 -0
- stdvoidsim/catalog/TsaGod/species.py +95 -0
- stdvoidsim/catalog/WamUnd/__init__.py +6 -0
- stdvoidsim/catalog/WamUnd/demographic_models.py +185 -0
- stdvoidsim/catalog/WamUnd/genome_data.py +46 -0
- stdvoidsim/catalog/WamUnd/species.py +70 -0
- stdvoidsim/catalog/YitGre/__init__.py +6 -0
- stdvoidsim/catalog/YitGre/demographic_models.py +172 -0
- stdvoidsim/catalog/YitGre/genome_data.py +42 -0
- stdvoidsim/catalog/YitGre/species.py +104 -0
- stdvoidsim/catalog/YogSot/__init__.py +6 -0
- stdvoidsim/catalog/YogSot/demographic_models.py +135 -0
- stdvoidsim/catalog/YogSot/genome_data.py +28 -0
- stdvoidsim/catalog/YogSot/species.py +54 -0
- stdvoidsim/catalog/ZooGul/__init__.py +6 -0
- stdvoidsim/catalog/ZooGul/demographic_models.py +102 -0
- stdvoidsim/catalog/ZooGul/genome_data.py +14 -0
- stdvoidsim/catalog/ZooGul/species.py +51 -0
- stdvoidsim/catalog/__init__.py +8 -0
- stdvoidsim/citations.py +120 -0
- stdvoidsim/cli.py +1157 -0
- stdvoidsim/dfe.py +380 -0
- stdvoidsim/engines.py +334 -0
- stdvoidsim/genetic_maps.py +132 -0
- stdvoidsim/genomes.py +903 -0
- stdvoidsim/models.py +374 -0
- stdvoidsim/qc/__init__.py +1 -0
- stdvoidsim/selection.py +397 -0
- stdvoidsim/slim_engine.py +2072 -0
- stdvoidsim/species.py +381 -0
- stdvoidsim/utils.py +359 -0
- stdvoidsim/warning_categories.py +32 -0
- stdvoidsim-0.1.1.dist-info/METADATA +232 -0
- stdvoidsim-0.1.1.dist-info/RECORD +185 -0
- stdvoidsim-0.1.1.dist-info/WHEEL +5 -0
- stdvoidsim-0.1.1.dist-info/entry_points.txt +2 -0
- stdvoidsim-0.1.1.dist-info/licenses/LICENSE +674 -0
- stdvoidsim-0.1.1.dist-info/top_level.txt +1 -0
stdvoidsim/__init__.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Main entry point for stdvoidsim (fork of stdvoidsim for Lovecraftian entities)
|
|
2
|
+
|
|
3
|
+
__version__ = "undefined"
|
|
4
|
+
try:
|
|
5
|
+
from . import _version
|
|
6
|
+
|
|
7
|
+
__version__ = _version.version
|
|
8
|
+
except ImportError:
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
# Internal modules. Import here to flatten the namespace.
|
|
12
|
+
from .genetic_maps import * # NOQA
|
|
13
|
+
from .models import * # NOQA
|
|
14
|
+
from .species import * # NOQA
|
|
15
|
+
from .genomes import * # NOQA
|
|
16
|
+
from .annotations import * # NOQA
|
|
17
|
+
from .dfe import * # NOQA
|
|
18
|
+
from .cache import * # NOQA
|
|
19
|
+
from .citations import * # NOQA
|
|
20
|
+
from .engines import * # NOQA
|
|
21
|
+
from .warning_categories import * # NOQA
|
|
22
|
+
|
|
23
|
+
# We import catalog here, but the internal functions
|
|
24
|
+
# defined are not part of the external API.
|
|
25
|
+
from .catalog import * # NOQA
|
|
26
|
+
|
|
27
|
+
from . import qc # NOQA
|
|
28
|
+
|
|
29
|
+
from .selection import * # NOQA
|
|
30
|
+
from .slim_engine import * # NOQA
|
stdvoidsim/__main__.py
ADDED
stdvoidsim/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.1.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 1)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Infrastructure for defining information about genome annotation.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import attr
|
|
8
|
+
import numpy as np
|
|
9
|
+
import stdvoidsim
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@attr.s(kw_only=True)
|
|
15
|
+
class Annotation:
|
|
16
|
+
"""
|
|
17
|
+
Class representing an annotation track.
|
|
18
|
+
|
|
19
|
+
:ivar str ~.id: String that uniquely identifies the annotation.
|
|
20
|
+
:ivar species: The species to which this annotation applies.
|
|
21
|
+
:vartype species: :class:`.Species`
|
|
22
|
+
:ivar str url: The URL where the packed and compressed GFF3 can be found.
|
|
23
|
+
:ivar str intervals_url: The URL of the intervals cache of the annotations.
|
|
24
|
+
:ivar str intervals_sha256: The SHA256 checksum of the annotations cache.
|
|
25
|
+
:ivar str ~.description: One line description of the annotation.
|
|
26
|
+
:ivar citations: List of citations for the annotation.
|
|
27
|
+
:vartype citations: list of :class:`.Citation`
|
|
28
|
+
:ivar file_pattern: The pattern used to map individual chromosome id strings
|
|
29
|
+
to files
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
id = attr.ib()
|
|
33
|
+
species = attr.ib()
|
|
34
|
+
url = attr.ib()
|
|
35
|
+
gff_sha256 = attr.ib()
|
|
36
|
+
intervals_url = attr.ib()
|
|
37
|
+
intervals_sha256 = attr.ib()
|
|
38
|
+
description = attr.ib()
|
|
39
|
+
citations = attr.ib(factory=list)
|
|
40
|
+
file_pattern = attr.ib()
|
|
41
|
+
annotation_source = attr.ib()
|
|
42
|
+
annotation_type = attr.ib()
|
|
43
|
+
|
|
44
|
+
def __attrs_post_init__(self):
|
|
45
|
+
self._cache = stdvoidsim.CachedData(
|
|
46
|
+
namespace=f"annotations/{self.species.id}/{self.id}",
|
|
47
|
+
url=self.intervals_url,
|
|
48
|
+
sha256=self.intervals_sha256,
|
|
49
|
+
extract=True,
|
|
50
|
+
)
|
|
51
|
+
# logging.info(f"annotation namespace = {self._cache.namespace}")
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def cache_path(self):
|
|
55
|
+
return self._cache.cache_path
|
|
56
|
+
|
|
57
|
+
def __str__(self):
|
|
58
|
+
s = "GTF Annotation:\n"
|
|
59
|
+
s += "\tspecies = {}\n".format(self.species.name)
|
|
60
|
+
s += "\tid = {}\n".format(self.id)
|
|
61
|
+
s += "\turl = {}\n".format(self.url)
|
|
62
|
+
s += "\tintervals url = {}\n".format(self.intervals_url)
|
|
63
|
+
s += "\tcached = {}\n".format(self.is_cached())
|
|
64
|
+
s += "\tcache_path = {}\n".format(self.cache_path)
|
|
65
|
+
return s
|
|
66
|
+
|
|
67
|
+
def is_cached(self):
|
|
68
|
+
"""
|
|
69
|
+
Returns True if this annotation is cached locally.
|
|
70
|
+
"""
|
|
71
|
+
return self._cache.is_valid()
|
|
72
|
+
|
|
73
|
+
def download(self):
|
|
74
|
+
"""
|
|
75
|
+
Downloads the intervals URL and stores it in the cache directory.
|
|
76
|
+
"""
|
|
77
|
+
self._cache.download()
|
|
78
|
+
|
|
79
|
+
def get_chromosome_annotations(self, id):
|
|
80
|
+
"""
|
|
81
|
+
Returns the numpy interval array for the chromosome with the specified id.
|
|
82
|
+
"""
|
|
83
|
+
chrom = self.species.genome.get_chromosome(id)
|
|
84
|
+
if not self.is_cached():
|
|
85
|
+
self.download()
|
|
86
|
+
file_path = os.path.join(self.cache_path, self.file_pattern.format(id=chrom.id))
|
|
87
|
+
ret = np.loadtxt(file_path, dtype="int32")
|
|
88
|
+
if len(ret) == 0:
|
|
89
|
+
raise ValueError(f"No annotations found for {id}")
|
|
90
|
+
return ret
|
stdvoidsim/cache.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cache handling for downloaded data.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pathlib
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import urllib.parse
|
|
9
|
+
import tempfile
|
|
10
|
+
import warnings
|
|
11
|
+
|
|
12
|
+
import appdirs
|
|
13
|
+
import attr
|
|
14
|
+
|
|
15
|
+
from . import utils
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
_cache_dir = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def set_cache_dir(cache_dir=None):
|
|
23
|
+
"""
|
|
24
|
+
The cache_dir is the directory in which stdvoidsim stores and checks for
|
|
25
|
+
downloaded data. If the specified cache_dir is not None, this value is
|
|
26
|
+
converted to a pathlib.Path instance, which is used as the cache directory.
|
|
27
|
+
If cache_dir is None (the default), the cache directory is set either from
|
|
28
|
+
the environment variable `STDPOPSIM_CACHE` if it exists, or set to the
|
|
29
|
+
default location using the :mod:`appdirs` module.
|
|
30
|
+
|
|
31
|
+
No checks for existance, writability, etc. are performed by this function.
|
|
32
|
+
"""
|
|
33
|
+
if cache_dir is None:
|
|
34
|
+
cache_dir = os.environ.get("STDPOPSIM_CACHE", None)
|
|
35
|
+
if cache_dir is None:
|
|
36
|
+
cache_dir = appdirs.user_cache_dir("stdvoidsim", "popgensims")
|
|
37
|
+
global _cache_dir
|
|
38
|
+
_cache_dir = pathlib.Path(cache_dir)
|
|
39
|
+
logger.info(f"Set cache_dir to {_cache_dir}")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_cache_dir():
|
|
43
|
+
"""
|
|
44
|
+
Returns the directory used to cache material downloaded by stdvoidsim as a
|
|
45
|
+
pathlib.Path instance. Defaults to a directory 'stdvoidsim' in a user cache directory
|
|
46
|
+
(e.g., ~/.cache/stdopsim on Unix flavours). See the :func:`.set_cache_dir` function
|
|
47
|
+
for how this value can be set.
|
|
48
|
+
"""
|
|
49
|
+
return _cache_dir
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
set_cache_dir()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@attr.s(kw_only=True)
|
|
56
|
+
class CachedData:
|
|
57
|
+
"""
|
|
58
|
+
Downloadable data that will be cached locally.
|
|
59
|
+
|
|
60
|
+
The downloadable should be a single file. The local cache may
|
|
61
|
+
correspond to this same file, or to its extracted contents. In the latter
|
|
62
|
+
case, the downloaded file will be removed after archive extraction.
|
|
63
|
+
The downloaded file is compared against the expected SHA256 checksum,
|
|
64
|
+
and if correct, the checksum is then also stored locally.
|
|
65
|
+
|
|
66
|
+
:ivar str namespace: The namespace under which the cache will be stored.
|
|
67
|
+
This will be converted into a folder, by constructing folders in the
|
|
68
|
+
cache corresponding to each component of the namespace.
|
|
69
|
+
E.g. if we're on a unix system with cache under ``/path/to/cache``, and
|
|
70
|
+
``namespace="foo/bar"``, the cached data will live under
|
|
71
|
+
``/path/to/cache/foo/bar``.
|
|
72
|
+
:ivar str url: The URL of the data to be cached.
|
|
73
|
+
:ivar str sha256: The SHA256 checksum of the downloaded file.
|
|
74
|
+
:ivar bool extract: True if the downloaded file is a tarball that should be
|
|
75
|
+
extracted into the cached namespace, False otherwise.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
namespace = attr.ib(type=str)
|
|
79
|
+
url = attr.ib(type=str)
|
|
80
|
+
sha256 = attr.ib(type=str)
|
|
81
|
+
extract = attr.ib(type=bool)
|
|
82
|
+
|
|
83
|
+
def __attrs_post_init__(self):
|
|
84
|
+
u = urllib.parse.urlparse(self.url)
|
|
85
|
+
self._basename = pathlib.PurePath(u.path).name
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def sha256_file(self):
|
|
89
|
+
return get_cache_dir() / self.namespace / f"{self._basename}.sha256"
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def cache_path(self):
|
|
93
|
+
# the cache path could be a folder or a file, depending on self.extract
|
|
94
|
+
path = get_cache_dir() / self.namespace
|
|
95
|
+
if not self.extract:
|
|
96
|
+
path = path / self._basename
|
|
97
|
+
return path
|
|
98
|
+
|
|
99
|
+
def is_cached(self):
|
|
100
|
+
"""
|
|
101
|
+
Returns True if the data is cached locally.
|
|
102
|
+
"""
|
|
103
|
+
return self.cache_path.exists()
|
|
104
|
+
|
|
105
|
+
def is_valid(self):
|
|
106
|
+
"""
|
|
107
|
+
Returns True if the cached data matches the checksum.
|
|
108
|
+
"""
|
|
109
|
+
is_valid = False
|
|
110
|
+
if self.is_cached() and self.sha256_file.exists():
|
|
111
|
+
with open(self.sha256_file, "r") as f:
|
|
112
|
+
cached_sha256 = f.read().strip()
|
|
113
|
+
is_valid = self.sha256 == cached_sha256
|
|
114
|
+
return is_valid
|
|
115
|
+
|
|
116
|
+
def download(self):
|
|
117
|
+
"""
|
|
118
|
+
Downloads the file from the source URL and stores it in the cache.
|
|
119
|
+
If the local cache already exists, it is first removed.
|
|
120
|
+
"""
|
|
121
|
+
if self.is_cached():
|
|
122
|
+
logger.info(f"Clearing cache {self.cache_path}")
|
|
123
|
+
with tempfile.TemporaryDirectory(dir=get_cache_dir()) as tempdir:
|
|
124
|
+
# Atomically move to a temporary directory, which will be automatically
|
|
125
|
+
# deleted on exit.
|
|
126
|
+
dest = pathlib.Path(tempdir) / "will_be_deleted"
|
|
127
|
+
os.rename(self.cache_path, dest)
|
|
128
|
+
|
|
129
|
+
self.cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
130
|
+
|
|
131
|
+
logger.info(f"Downloading {self.url}")
|
|
132
|
+
# os.rename will not work on some Unixes if the source and dest are on
|
|
133
|
+
# different file systems. Keep the tempdir in the same directory as
|
|
134
|
+
# the destination to ensure it's on the same file system.
|
|
135
|
+
with tempfile.TemporaryDirectory(dir=get_cache_dir()) as tempdir:
|
|
136
|
+
tempdir = pathlib.Path(tempdir)
|
|
137
|
+
local_path = tempdir / "downloaded"
|
|
138
|
+
utils.download(self.url, local_path)
|
|
139
|
+
|
|
140
|
+
logger.debug("Checking SHA256")
|
|
141
|
+
download_sha256 = utils.sha256(local_path)
|
|
142
|
+
if download_sha256 != self.sha256:
|
|
143
|
+
# TODO: use a more appropriate exception here.
|
|
144
|
+
raise ValueError(
|
|
145
|
+
f"Expected SHA256={self.sha256}, but downloaded file has"
|
|
146
|
+
f"{download_sha256}."
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
if self.extract:
|
|
150
|
+
extract_dir = tempdir / "extracted"
|
|
151
|
+
extract_dir.mkdir()
|
|
152
|
+
logger.debug(f"Extracting {local_path}")
|
|
153
|
+
utils.untar(local_path, extract_dir)
|
|
154
|
+
local_path = extract_dir
|
|
155
|
+
|
|
156
|
+
# If this has all gone OK up to here we can now move the
|
|
157
|
+
# data into the cache location. This should minimise the
|
|
158
|
+
# chances of having malformed data in the cache.
|
|
159
|
+
logger.info(f"Saving to {self.cache_path}")
|
|
160
|
+
# os.rename is atomic, and will raise an OSError if the destination
|
|
161
|
+
# is a directory and already exists. Therefore, if we see the map
|
|
162
|
+
# exists we assume that some other process has already downloaded
|
|
163
|
+
# it, and raise a warning.
|
|
164
|
+
# If the source and destination are regular files (such as when
|
|
165
|
+
# self.extract==False), the destination will be silently replaced
|
|
166
|
+
# on unix systems, but FileExistsError will be raised on windows.
|
|
167
|
+
try:
|
|
168
|
+
os.rename(local_path, self.cache_path)
|
|
169
|
+
except (OSError, FileExistsError):
|
|
170
|
+
warnings.warn(
|
|
171
|
+
"Error occured renaming map directory. Are multiple processes "
|
|
172
|
+
"downloading this map at the same time?"
|
|
173
|
+
)
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
# Write out the checksum.
|
|
177
|
+
with open(self.sha256_file, "w") as f:
|
|
178
|
+
print(self.sha256, file=f)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import msprime
|
|
2
|
+
import stdvoidsim
|
|
3
|
+
|
|
4
|
+
_species = stdvoidsim.get_species("AzaPri")
|
|
5
|
+
|
|
6
|
+
_nucleus_pop = stdvoidsim.Population(
|
|
7
|
+
id="Nucleus", description="The nuclear chaos at the center of infinity"
|
|
8
|
+
)
|
|
9
|
+
_fragment_pop = stdvoidsim.Population(
|
|
10
|
+
id="Fragment", description="Fragmentary emanations cast into the void"
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _nuclear_pulsation():
|
|
15
|
+
id = "NuclearPulsation_1S22"
|
|
16
|
+
description = "Pulsating single population model of Azathoth"
|
|
17
|
+
long_description = """
|
|
18
|
+
A single population model representing the pulsating nuclear chaos
|
|
19
|
+
of Azathoth at the center of ultimate chaos. The entity periodically
|
|
20
|
+
fragments and reconverges. Three epochs: current singularity (N=1),
|
|
21
|
+
brief fragmentation 10 generations ago (N=100), ancient unified
|
|
22
|
+
chaos 100 generations ago (N=1).
|
|
23
|
+
"""
|
|
24
|
+
populations = [_nucleus_pop]
|
|
25
|
+
citations = [
|
|
26
|
+
stdvoidsim.Citation(
|
|
27
|
+
author="The Daemon Sultan Cult",
|
|
28
|
+
year=1928,
|
|
29
|
+
doi="https://en.wikipedia.org/wiki/Necronomicon",
|
|
30
|
+
reasons={stdvoidsim.CiteReason.DEM_MODEL},
|
|
31
|
+
)
|
|
32
|
+
]
|
|
33
|
+
generation_time = _species.generation_time
|
|
34
|
+
mutation_rate = 1e-11
|
|
35
|
+
|
|
36
|
+
N_current = 1
|
|
37
|
+
N_fragment = 100
|
|
38
|
+
N_ancient = 1
|
|
39
|
+
t_fragment = 10
|
|
40
|
+
t_ancient = 100
|
|
41
|
+
|
|
42
|
+
return stdvoidsim.DemographicModel(
|
|
43
|
+
id=id,
|
|
44
|
+
description=description,
|
|
45
|
+
long_description=long_description,
|
|
46
|
+
populations=populations,
|
|
47
|
+
citations=citations,
|
|
48
|
+
generation_time=generation_time,
|
|
49
|
+
mutation_rate=mutation_rate,
|
|
50
|
+
population_configurations=[
|
|
51
|
+
msprime.PopulationConfiguration(
|
|
52
|
+
initial_size=N_current, metadata=populations[0].asdict()
|
|
53
|
+
)
|
|
54
|
+
],
|
|
55
|
+
demographic_events=[
|
|
56
|
+
msprime.PopulationParametersChange(
|
|
57
|
+
time=t_fragment, initial_size=N_fragment, population_id=0
|
|
58
|
+
),
|
|
59
|
+
msprime.PopulationParametersChange(
|
|
60
|
+
time=t_ancient, initial_size=N_ancient, population_id=0
|
|
61
|
+
),
|
|
62
|
+
],
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_species.add_demographic_model(_nuclear_pulsation())
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _void_emanation():
|
|
70
|
+
id = "VoidEmanation_2S22"
|
|
71
|
+
description = "Two population model of Azathoth nucleus and void fragments"
|
|
72
|
+
long_description = """
|
|
73
|
+
Two population model: the central nuclear chaos and fragmentary
|
|
74
|
+
emanations cast into the void. Ancestral unified chaos N=100.
|
|
75
|
+
Split at 50 generations ago. Nucleus contracts to N=1.
|
|
76
|
+
Fragments expand to N=50.
|
|
77
|
+
"""
|
|
78
|
+
populations = [_nucleus_pop, _fragment_pop]
|
|
79
|
+
citations = [
|
|
80
|
+
stdvoidsim.Citation(
|
|
81
|
+
author="The Daemon Sultan Cult",
|
|
82
|
+
year=1928,
|
|
83
|
+
doi="https://en.wikipedia.org/wiki/Necronomicon",
|
|
84
|
+
reasons={stdvoidsim.CiteReason.DEM_MODEL},
|
|
85
|
+
)
|
|
86
|
+
]
|
|
87
|
+
generation_time = _species.generation_time
|
|
88
|
+
mutation_rate = 1e-11
|
|
89
|
+
|
|
90
|
+
N_anc = 100
|
|
91
|
+
N_nucleus = 1
|
|
92
|
+
N_fragments = 50
|
|
93
|
+
t_split = 50
|
|
94
|
+
|
|
95
|
+
return stdvoidsim.DemographicModel(
|
|
96
|
+
id=id,
|
|
97
|
+
description=description,
|
|
98
|
+
long_description=long_description,
|
|
99
|
+
populations=populations,
|
|
100
|
+
citations=citations,
|
|
101
|
+
generation_time=generation_time,
|
|
102
|
+
mutation_rate=mutation_rate,
|
|
103
|
+
population_configurations=[
|
|
104
|
+
msprime.PopulationConfiguration(
|
|
105
|
+
initial_size=N_nucleus, metadata=populations[0].asdict()
|
|
106
|
+
),
|
|
107
|
+
msprime.PopulationConfiguration(
|
|
108
|
+
initial_size=N_fragments, metadata=populations[1].asdict()
|
|
109
|
+
),
|
|
110
|
+
],
|
|
111
|
+
demographic_events=[
|
|
112
|
+
msprime.MassMigration(time=t_split, source=1, destination=0, proportion=1.0),
|
|
113
|
+
msprime.PopulationParametersChange(
|
|
114
|
+
time=t_split, initial_size=N_anc, population_id=0
|
|
115
|
+
),
|
|
116
|
+
],
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
_species.add_demographic_model(_void_emanation())
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
data = {
|
|
2
|
+
"assembly_accession": "GCA_VOID_000011",
|
|
3
|
+
"assembly_name": "NUCLEUS1.0",
|
|
4
|
+
"chromosomes": {
|
|
5
|
+
"I": {"length": 2000000000, "synonyms": []},
|
|
6
|
+
"nuclear_chaos_element": {"length": 50000, "synonyms": []},
|
|
7
|
+
},
|
|
8
|
+
"assembly_source": "void_archives",
|
|
9
|
+
"assembly_build_version": "1",
|
|
10
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import stdvoidsim
|
|
2
|
+
from . import genome_data
|
|
3
|
+
|
|
4
|
+
_citation = stdvoidsim.Citation(
|
|
5
|
+
author="The Daemon Sultan Cult",
|
|
6
|
+
year=1928,
|
|
7
|
+
doi="https://en.wikipedia.org/wiki/Necronomicon",
|
|
8
|
+
reasons={stdvoidsim.CiteReason.GEN_TIME, stdvoidsim.CiteReason.POP_SIZE},
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
_assembly_citation = stdvoidsim.Citation(
|
|
12
|
+
author="The Daemon Sultan Cult",
|
|
13
|
+
year=1928,
|
|
14
|
+
doi="https://en.wikipedia.org/wiki/Necronomicon",
|
|
15
|
+
reasons={stdvoidsim.CiteReason.ASSEMBLY},
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
_mutation_citation = stdvoidsim.Citation(
|
|
19
|
+
author="The Daemon Sultan Cult",
|
|
20
|
+
year=1928,
|
|
21
|
+
doi="https://en.wikipedia.org/wiki/Necronomicon",
|
|
22
|
+
reasons={stdvoidsim.CiteReason.MUT_RATE, stdvoidsim.CiteReason.REC_RATE},
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
_recombination_rate = {
|
|
26
|
+
"I": 1e-12,
|
|
27
|
+
"nuclear_chaos_element": 0,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
_mutation_rate = {
|
|
31
|
+
"I": 1e-11,
|
|
32
|
+
"nuclear_chaos_element": 1e-6,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
_species_ploidy = 2
|
|
36
|
+
_ploidy = {
|
|
37
|
+
"I": _species_ploidy,
|
|
38
|
+
"nuclear_chaos_element": 1,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
_genome = stdvoidsim.Genome.from_data(
|
|
42
|
+
genome_data.data,
|
|
43
|
+
recombination_rate=_recombination_rate,
|
|
44
|
+
mutation_rate=_mutation_rate,
|
|
45
|
+
ploidy=_ploidy,
|
|
46
|
+
citations=[
|
|
47
|
+
_mutation_citation,
|
|
48
|
+
_assembly_citation,
|
|
49
|
+
],
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
_species = stdvoidsim.Species(
|
|
53
|
+
id="AzaPri",
|
|
54
|
+
ensembl_id="azathoth_primordia",
|
|
55
|
+
name="Azathoth primordia",
|
|
56
|
+
common_name="Blind Idiot God",
|
|
57
|
+
separate_sexes=False,
|
|
58
|
+
genome=_genome,
|
|
59
|
+
generation_time=1000000,
|
|
60
|
+
population_size=1,
|
|
61
|
+
ploidy=_species_ploidy,
|
|
62
|
+
citations=[_citation],
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
stdvoidsim.register_species(_species)
|