PyPI - toil - Versions diffs - 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl - Mend

toil 5.12.0py3-none-any.whl → 6.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

toil/__init__.py +18 -13
toil/batchSystems/abstractBatchSystem.py +39 -13
toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
toil/batchSystems/awsBatch.py +14 -14
toil/batchSystems/cleanup_support.py +7 -3
toil/batchSystems/contained_executor.py +3 -3
toil/batchSystems/htcondor.py +0 -1
toil/batchSystems/kubernetes.py +34 -31
toil/batchSystems/local_support.py +3 -1
toil/batchSystems/lsf.py +7 -7
toil/batchSystems/mesos/batchSystem.py +7 -7
toil/batchSystems/options.py +32 -83
toil/batchSystems/registry.py +104 -23
toil/batchSystems/singleMachine.py +16 -13
toil/batchSystems/slurm.py +87 -16
toil/batchSystems/torque.py +0 -1
toil/bus.py +44 -8
toil/common.py +544 -753
toil/cwl/__init__.py +28 -32
toil/cwl/cwltoil.py +595 -574
toil/cwl/utils.py +55 -10
toil/exceptions.py +1 -1
toil/fileStores/__init__.py +2 -2
toil/fileStores/abstractFileStore.py +88 -14
toil/fileStores/cachingFileStore.py +610 -549
toil/fileStores/nonCachingFileStore.py +46 -22
toil/job.py +182 -101
toil/jobStores/abstractJobStore.py +161 -95
toil/jobStores/aws/jobStore.py +23 -9
toil/jobStores/aws/utils.py +6 -6
toil/jobStores/fileJobStore.py +116 -18
toil/jobStores/googleJobStore.py +16 -7
toil/jobStores/utils.py +5 -6
toil/leader.py +87 -56
toil/lib/accelerators.py +10 -5
toil/lib/aws/__init__.py +3 -14
toil/lib/aws/ami.py +22 -9
toil/lib/aws/iam.py +21 -13
toil/lib/aws/session.py +2 -16
toil/lib/aws/utils.py +4 -5
toil/lib/compatibility.py +1 -1
toil/lib/conversions.py +26 -3
toil/lib/docker.py +22 -23
toil/lib/ec2.py +10 -6
toil/lib/ec2nodes.py +106 -100
toil/lib/encryption/_nacl.py +2 -1
toil/lib/generatedEC2Lists.py +325 -18
toil/lib/io.py +49 -2
toil/lib/misc.py +1 -1
toil/lib/resources.py +9 -2
toil/lib/threading.py +101 -38
toil/options/common.py +736 -0
toil/options/cwl.py +336 -0
toil/options/wdl.py +37 -0
toil/provisioners/abstractProvisioner.py +9 -4
toil/provisioners/aws/__init__.py +3 -6
toil/provisioners/aws/awsProvisioner.py +6 -0
toil/provisioners/clusterScaler.py +3 -2
toil/provisioners/gceProvisioner.py +2 -2
toil/realtimeLogger.py +2 -1
toil/resource.py +24 -18
toil/server/app.py +2 -3
toil/server/cli/wes_cwl_runner.py +4 -4
toil/server/utils.py +1 -1
toil/server/wes/abstract_backend.py +3 -2
toil/server/wes/amazon_wes_utils.py +5 -4
toil/server/wes/tasks.py +2 -3
toil/server/wes/toil_backend.py +2 -10
toil/server/wsgi_app.py +2 -0
toil/serviceManager.py +12 -10
toil/statsAndLogging.py +41 -9
toil/test/__init__.py +29 -54
toil/test/batchSystems/batchSystemTest.py +11 -111
toil/test/batchSystems/test_slurm.py +24 -8
toil/test/cactus/__init__.py +0 -0
toil/test/cactus/test_cactus_integration.py +58 -0
toil/test/cwl/cwlTest.py +438 -223
toil/test/cwl/glob_dir.cwl +15 -0
toil/test/cwl/preemptible.cwl +21 -0
toil/test/cwl/preemptible_expression.cwl +28 -0
toil/test/cwl/revsort.cwl +1 -1
toil/test/cwl/revsort2.cwl +1 -1
toil/test/docs/scriptsTest.py +2 -3
toil/test/jobStores/jobStoreTest.py +34 -21
toil/test/lib/aws/test_iam.py +4 -14
toil/test/lib/aws/test_utils.py +0 -3
toil/test/lib/dockerTest.py +4 -4
toil/test/lib/test_ec2.py +12 -17
toil/test/mesos/helloWorld.py +4 -5
toil/test/mesos/stress.py +1 -1
toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
toil/test/options/options.py +37 -0
toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
toil/test/provisioners/clusterScalerTest.py +6 -4
toil/test/provisioners/clusterTest.py +23 -11
toil/test/provisioners/gceProvisionerTest.py +0 -6
toil/test/provisioners/restartScript.py +3 -2
toil/test/server/serverTest.py +1 -1
toil/test/sort/restart_sort.py +2 -1
toil/test/sort/sort.py +2 -1
toil/test/sort/sortTest.py +2 -13
toil/test/src/autoDeploymentTest.py +45 -45
toil/test/src/busTest.py +5 -5
toil/test/src/checkpointTest.py +2 -2
toil/test/src/deferredFunctionTest.py +1 -1
toil/test/src/fileStoreTest.py +32 -16
toil/test/src/helloWorldTest.py +1 -1
toil/test/src/importExportFileTest.py +1 -1
toil/test/src/jobDescriptionTest.py +2 -1
toil/test/src/jobServiceTest.py +1 -1
toil/test/src/jobTest.py +18 -18
toil/test/src/miscTests.py +5 -3
toil/test/src/promisedRequirementTest.py +3 -3
toil/test/src/realtimeLoggerTest.py +1 -1
toil/test/src/resourceTest.py +2 -2
toil/test/src/restartDAGTest.py +1 -1
toil/test/src/resumabilityTest.py +36 -2
toil/test/src/retainTempDirTest.py +1 -1
toil/test/src/systemTest.py +2 -2
toil/test/src/toilContextManagerTest.py +2 -2
toil/test/src/userDefinedJobArgTypeTest.py +1 -1
toil/test/utils/toilDebugTest.py +98 -32
toil/test/utils/toilKillTest.py +2 -2
toil/test/utils/utilsTest.py +23 -3
toil/test/wdl/wdltoil_test.py +223 -45
toil/toilState.py +7 -6
toil/utils/toilClean.py +1 -1
toil/utils/toilConfig.py +36 -0
toil/utils/toilDebugFile.py +60 -33
toil/utils/toilDebugJob.py +39 -12
toil/utils/toilDestroyCluster.py +1 -1
toil/utils/toilKill.py +1 -1
toil/utils/toilLaunchCluster.py +13 -2
toil/utils/toilMain.py +3 -2
toil/utils/toilRsyncCluster.py +1 -1
toil/utils/toilSshCluster.py +1 -1
toil/utils/toilStats.py +445 -305
toil/utils/toilStatus.py +2 -5
toil/version.py +10 -10
toil/wdl/utils.py +2 -122
toil/wdl/wdltoil.py +1257 -492
toil/worker.py +55 -46
toil-6.1.0.dist-info/METADATA +124 -0
toil-6.1.0.dist-info/RECORD +241 -0
{toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
{toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
toil/batchSystems/parasol.py +0 -379
toil/batchSystems/tes.py +0 -459
toil/test/batchSystems/parasolTestSupport.py +0 -117
toil/test/wdl/builtinTest.py +0 -506
toil/test/wdl/toilwdlTest.py +0 -522
toil/wdl/toilwdl.py +0 -141
toil/wdl/versions/dev.py +0 -107
toil/wdl/versions/draft2.py +0 -980
toil/wdl/versions/v1.py +0 -794
toil/wdl/wdl_analysis.py +0 -116
toil/wdl/wdl_functions.py +0 -997
toil/wdl/wdl_synthesis.py +0 -1011
toil/wdl/wdl_types.py +0 -243
toil-5.12.0.dist-info/METADATA +0 -118
toil-5.12.0.dist-info/RECORD +0 -244
/toil/{wdl/versions → options}/__init__.py +0 -0
{toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
{toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0

toil/test/wdl/toilwdlTest.py DELETED Viewed

@@ -1,522 +0,0 @@
-import os
-import shutil
-import subprocess
-import tempfile
-from typing import List
-import unittest
-import uuid
-import zipfile
-from urllib.request import urlretrieve
-from toil.test import ToilTest, needs_docker, needs_java, slow
-from toil.version import exactPython
-from toil.wdl.utils import get_analyzer
-from toil.wdl.wdl_functions import (basename,
-                                    glob,
-                                    parse_cores,
-                                    parse_disk,
-                                    parse_memory,
-                                    process_infile,
-                                    read_csv,
-                                    read_tsv,
-                                    select_first,
-                                    size)
-class BaseToilWdlTest(ToilTest):
-    """Base test class for WDL tests"""
-    def setUp(self) -> None:
-        """Runs anew before each test to create farm fresh temp dirs."""
-        self.output_dir = os.path.join('/tmp/', 'toil-wdl-test-' + str(uuid.uuid4()))
-        os.makedirs(self.output_dir)
-    def tearDown(self) -> None:
-        if os.path.exists(self.output_dir):
-            shutil.rmtree(self.output_dir)
-    @classmethod
-    def setUpClass(cls) -> None:
-        """Runs once for all tests."""
-        super(BaseToilWdlTest, cls).setUpClass()
-        cls.base_command = [exactPython, os.path.abspath("src/toil/wdl/toilwdl.py")]
-class ToilWdlTest(BaseToilWdlTest):
-    """
-    General tests for Toil WDL
-    """
-    @needs_docker
-    def testMD5sum(self):
-        """Test if toilwdl produces the same outputs as known good outputs for WDL's
-        GATK tutorial #1."""
-        wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.wdl')
-        inputfile = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.input')
-        json = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.json')
-        subprocess.check_call(self.base_command + [wdl, json, '-o', self.output_dir, '--logDebug'])
-        md5sum_output = os.path.join(self.output_dir, 'md5sum.txt')
-        assert os.path.exists(md5sum_output)
-        os.unlink(md5sum_output)
-class ToilWDLLibraryTest(BaseToilWdlTest):
-    """
-    Test class for WDL standard functions.
-    """
-    # estimated run time <1 sec
-    def testFn_SelectFirst(self):
-        """Test the wdl built-in functional equivalent of 'select_first()',
-        which returns the first value in a list that is not None."""
-        assert select_first(['somestring', 'anotherstring', None, '', 1]) == 'somestring'
-        assert select_first([None, '', 1, 'somestring']) == 1
-        assert select_first([2, 1, '', 'somestring', None, '']) == 2
-        assert select_first(['', 2, 1, 'somestring', None, '']) == 2
-    # estimated run time <1 sec
-    def testFn_Size(self) -> None:
-        """Test the wdl built-in functional equivalent of 'size()',
-        which returns a file's size based on the path."""
-        from toil.common import Toil
-        from toil.job import Job
-        from toil.wdl.wdl_types import WDLFile
-        options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
-        options.clean = 'always'
-        with Toil(options) as toil:
-            small = process_infile(WDLFile(file_path=os.path.abspath('src/toil/test/wdl/testfiles/vocab.wdl')), toil)
-            small_file = size(small)
-            assert small_file >= 1800, small_file
-    # estimated run time <1 sec
-    def testFn_Basename(self):
-        assert basename('/home/quokka/git/delete/toil/src/toil/wdl/toilwdl.py', '.py') == 'toilwdl'
-        assert basename('/home/quokka/git/delete/toil/src/toil/wdl/toilwdl.py') == 'toilwdl.py'
-        assert basename('toilwdl.py', '.py') == 'toilwdl'
-        assert basename('toilwdl.py') == 'toilwdl.py'
-    # estimated run time <1 sec
-    def testFn_Glob(self):
-        """Test the wdl built-in functional equivalent of 'glob()',
-        which finds all files with a pattern in a directory."""
-        vocab_location = glob('vocab.wdl', os.path.abspath('src/toil'))
-        assert vocab_location == [os.path.abspath('src/toil/test/wdl/testfiles/vocab.wdl')], str(vocab_location)
-        wdl_locations = glob('wdl_*.py', os.path.abspath('src/toil'))
-        wdl_that_should_exist = [os.path.abspath('src/toil/wdl/wdl_analysis.py'),
-                                 os.path.abspath('src/toil/wdl/wdl_synthesis.py'),
-                                 os.path.abspath('src/toil/wdl/wdl_types.py'),
-                                 os.path.abspath('src/toil/wdl/wdl_functions.py')]
-        # make sure the files match the expected files
-        for location in wdl_that_should_exist:
-            assert location in wdl_locations, f'{str(location)} not in {str(wdl_locations)}!'
-        # make sure the same number of files were found as expected
-        assert len(wdl_that_should_exist) == len(wdl_locations), f'{str(len(wdl_locations))} != {str(len(wdl_that_should_exist))}'
-    # estimated run time <1 sec
-    def testFn_ParseMemory(self):
-        """Test the wdl built-in functional equivalent of 'parse_memory()',
-        which parses a specified memory input to an int output.
-        The input can be a string or an int or a float and may include units
-        such as 'Gb' or 'mib' as a separate argument."""
-        assert parse_memory(2147483648) == 2147483648, str(parse_memory(2147483648))
-        assert parse_memory('2147483648') == 2147483648, str(parse_memory(2147483648))
-        assert parse_memory('2GB') == 2000000000, str(parse_memory('2GB'))
-        assert parse_memory('2GiB') == 2147483648, str(parse_memory('2GiB'))
-        assert parse_memory('1 GB') == 1000000000, str(parse_memory('1 GB'))
-        assert parse_memory('1 GiB') == 1073741824, str(parse_memory('1 GiB'))
-    # estimated run time <1 sec
-    def testFn_ParseCores(self):
-        """Test the wdl built-in functional equivalent of 'parse_cores()',
-        which parses a specified disk input to an int output.
-        The input can be a string or an int."""
-        assert parse_cores(1) == 1
-        assert parse_cores('1') == 1
-    # estimated run time <1 sec
-    def testFn_ParseDisk(self):
-        """Test the wdl built-in functional equivalent of 'parse_disk()',
-        which parses a specified disk input to an int output.
-        The input can be a string or an int or a float and may include units
-        such as 'Gb' or 'mib' as a separate argument.
-        The minimum returned value is 2147483648 bytes."""
-        # check minimum returned value
-        assert parse_disk('1') == 2147483648, str(parse_disk('1'))
-        assert parse_disk(1) == 2147483648, str(parse_disk(1))
-        assert parse_disk(2200000001) == 2200000001, str(parse_disk(2200000001))
-        assert parse_disk('2200000001') == 2200000001, str(parse_disk('2200000001'))
-        assert parse_disk('/mnt/my_mnt 3 SSD, /mnt/my_mnt2 500 HDD') == 503000000000, str(parse_disk('/mnt/my_mnt 3 SSD, /mnt/my_mnt2 500 HDD'))
-        assert parse_disk('local-disk 10 SSD') == 10000000000, str(parse_disk('local-disk 10 SSD'))
-        assert parse_disk('/mnt/ 10 HDD') == 10000000000, str(parse_disk('/mnt/ 10 HDD'))
-        assert parse_disk('/mnt/ 1000 HDD') == 1000000000000, str(parse_disk('/mnt/ 1000 HDD'))
-    # estimated run time <1 sec
-    def testPrimitives(self):
-        """Test if toilwdl correctly interprets some basic declarations."""
-        wdl = os.path.abspath('src/toil/test/wdl/testfiles/vocab.wdl')
-        # TODO: test for all version.
-        aWDL = get_analyzer(wdl)
-        aWDL.analyze()
-        no_declaration = ['bool1', 'int1', 'float1', 'file1', 'string1']
-        collection_counter = []
-        for key, declaration in aWDL.workflows_dictionary['vocabulary'].items():
-            if not key.startswith('declaration'):
-                continue
-            name, var_type, var_expr = declaration
-            if name in no_declaration:
-                collection_counter.append(name)
-                assert not var_expr
-            if name == 'bool2':
-                collection_counter.append(name)
-                assert var_expr == 'True', var_expr
-                assert var_type == 'Boolean', var_type
-            if name == 'int2':
-                collection_counter.append(name)
-                assert var_expr == '1', var_expr
-                assert var_type == 'Int', var_type
-            if name == 'float2':
-                collection_counter.append(name)
-                assert var_expr == '1.1', var_expr
-                assert var_type == 'Float', var_type
-            if name == 'file2':
-                collection_counter.append(name)
-                assert var_expr == "'src/toil/test/wdl/test.tsv'", var_expr
-                assert var_type == 'File', var_type
-            if name == 'string2':
-                collection_counter.append(name)
-                assert var_expr == "'x'", var_expr
-                assert var_type == 'String', var_type
-        assert collection_counter == ['bool1', 'int1', 'float1', 'file1', 'string1',
-                                      'bool2', 'int2', 'float2', 'file2', 'string2']
-    # estimated run time <1 sec
-    def testCSV(self):
-        default_csv_output = [['1', '2', '3'],
-                              ['4', '5', '6'],
-                              ['7', '8', '9']]
-        csv_array = read_csv(os.path.abspath('src/toil/test/wdl/test.csv'))
-        assert csv_array == default_csv_output
-    # estimated run time <1 sec
-    def testTSV(self):
-        default_tsv_output = [['1', '2', '3'],
-                              ['4', '5', '6'],
-                              ['7', '8', '9']]
-        tsv_array = read_tsv(os.path.abspath('src/toil/test/wdl/test.tsv'))
-        assert tsv_array == default_tsv_output
-class ToilWdlIntegrationTest(BaseToilWdlTest):
-    """Test class for WDL tests that need extra workflows and data downloaded"""
-    gatk_data: str
-    gatk_data_dir: str
-    encode_data: str
-    encode_data_dir: str
-    wdl_data: str
-    wdl_data_dir: str
-    @classmethod
-    def setUpClass(cls) -> None:
-        """Runs once for all tests."""
-        super(ToilWdlIntegrationTest, cls).setUpClass()
-        cls.test_directory = os.path.abspath("src/toil/test/wdl/")
-        cls.encode_data = os.path.join(cls.test_directory, "ENCODE_data.zip")
-        cls.encode_data_dir = os.path.join(cls.test_directory, "ENCODE_data")
-        cls.wdl_data = os.path.join(cls.test_directory, "wdl_templates.zip")
-        cls.wdl_data_dir = os.path.join(cls.test_directory, "wdl_templates")
-        cls.gatk_data = os.path.join(cls.test_directory, "GATK_data.zip")
-        cls.gatk_data_dir = os.path.join(cls.test_directory, "GATK_data")
-        cls.fetch_and_unzip_from_s3(filename='ENCODE_data.zip',
-                                    data=cls.encode_data,
-                                    data_dir=cls.encode_data_dir)
-        cls.fetch_and_unzip_from_s3(filename='wdl_templates.zip',
-                                    data=cls.wdl_data,
-                                    data_dir=cls.wdl_data_dir)
-        cls.fetch_and_unzip_from_s3(filename='GATK_data.zip',
-                                    data=cls.gatk_data,
-                                    data_dir=cls.gatk_data_dir)
-    @classmethod
-    def tearDownClass(cls) -> None:
-        """We generate a lot of cruft."""
-        data_dirs = [cls.gatk_data_dir, cls.wdl_data_dir, cls.encode_data_dir]
-        data_zips = [cls.gatk_data, cls.wdl_data, cls.encode_data]
-        encode_outputs = ['ENCFF000VOL_chr21.fq.gz',
-                          'ENCFF000VOL_chr21.raw.srt.bam',
-                          'ENCFF000VOL_chr21.raw.srt.bam.flagstat.qc',
-                          'ENCFF000VOL_chr21.raw.srt.dup.qc',
-                          'ENCFF000VOL_chr21.raw.srt.filt.nodup.srt.final.bam',
-                          'ENCFF000VOL_chr21.raw.srt.filt.nodup.srt.final.bam.bai',
-                          'ENCFF000VOL_chr21.raw.srt.filt.nodup.srt.final.filt.nodup.sample.15.SE.tagAlign.gz',
-                          'ENCFF000VOL_chr21.raw.srt.filt.nodup.srt.final.filt.nodup.sample.15.SE.tagAlign.gz.cc.plot.pdf',
-                          'ENCFF000VOL_chr21.raw.srt.filt.nodup.srt.final.filt.nodup.sample.15.SE.tagAlign.gz.cc.qc',
-                          'ENCFF000VOL_chr21.raw.srt.filt.nodup.srt.final.flagstat.qc',
-                          'ENCFF000VOL_chr21.raw.srt.filt.nodup.srt.final.pbc.qc',
-                          'ENCFF000VOL_chr21.raw.srt.filt.nodup.srt.final.SE.tagAlign.gz',
-                          'ENCFF000VOL_chr21.sai',
-                          'test.txt',
-                          'filter_qc.json',
-                          'filter_qc.log',
-                          'GRCh38_chr21_bwa.tar.gz',
-                          'mapping.json',
-                          'mapping.log',
-                          'post_mapping.json',
-                          'post_mapping.log',
-                          'wdl-stats.log',
-                          'xcor.json',
-                          'xcor.log',
-                          'toilwdl_compiled.pyc',
-                          'toilwdl_compiled.py',
-                          'post_processing.log',
-                          'md5.log']
-        for cleanup in data_dirs + data_zips + encode_outputs:
-            if os.path.isdir(cleanup):
-                shutil.rmtree(cleanup)
-            elif os.path.exists(cleanup):
-                os.remove(cleanup)
-        super(ToilWdlIntegrationTest, cls).tearDownClass()
-    # estimated run time 27 sec
-    @slow
-    @needs_java
-    def testTut01(self):
-        """Test if toilwdl produces the same outputs as known good outputs for WDL's
-        GATK tutorial #1."""
-        wdl = os.path.abspath("src/toil/test/wdl/wdl_templates/t01/helloHaplotypeCaller.wdl")
-        json = os.path.abspath("src/toil/test/wdl/wdl_templates/t01/helloHaplotypeCaller_inputs.json")
-        ref_dir = os.path.abspath("src/toil/test/wdl/wdl_templates/t01/output/")
-        subprocess.check_call(self.base_command + [wdl, json, '-o', self.output_dir])
-        compare_runs(self.output_dir, ref_dir)
-    # estimated run time 28 sec
-    @slow
-    @needs_java
-    def testTut02(self):
-        """Test if toilwdl produces the same outputs as known good outputs for WDL's
-        GATK tutorial #2."""
-        wdl = os.path.abspath("src/toil/test/wdl/wdl_templates/t02/simpleVariantSelection.wdl")
-        json = os.path.abspath("src/toil/test/wdl/wdl_templates/t02/simpleVariantSelection_inputs.json")
-        ref_dir = os.path.abspath("src/toil/test/wdl/wdl_templates/t02/output/")
-        subprocess.check_call(self.base_command + [wdl, json, '-o', self.output_dir])
-        compare_runs(self.output_dir, ref_dir)
-    # estimated run time 60 sec
-    @slow
-    @needs_java
-    def testTut03(self):
-        """Test if toilwdl produces the same outputs as known good outputs for WDL's
-        GATK tutorial #3."""
-        wdl = os.path.abspath("src/toil/test/wdl/wdl_templates/t03/simpleVariantDiscovery.wdl")
-        json = os.path.abspath("src/toil/test/wdl/wdl_templates/t03/simpleVariantDiscovery_inputs.json")
-        ref_dir = os.path.abspath("src/toil/test/wdl/wdl_templates/t03/output/")
-        subprocess.check_call(self.base_command + [wdl, json, '-o', self.output_dir])
-        compare_runs(self.output_dir, ref_dir)
-    # estimated run time 175 sec
-    @slow
-    @needs_java
-    @unittest.skip('broken; see: https://github.com/DataBiosphere/toil/issues/3339')
-    def testTut04(self):
-        """Test if toilwdl produces the same outputs as known good outputs for WDL's
-        GATK tutorial #4."""
-        wdl = os.path.abspath("src/toil/test/wdl/wdl_templates/t04/jointCallingGenotypes.wdl")
-        json = os.path.abspath("src/toil/test/wdl/wdl_templates/t04/jointCallingGenotypes_inputs.json")
-        ref_dir = os.path.abspath("src/toil/test/wdl/wdl_templates/t04/output/")
-        subprocess.check_call(self.base_command + [wdl, json, '-o', self.output_dir])
-        compare_runs(self.output_dir, ref_dir)
-    # estimated run time 80 sec
-    @slow
-    @needs_docker
-    def testENCODE(self):
-        """Test if toilwdl produces the same outputs as known good outputs for
-        a short ENCODE run."""
-        wdl = os.path.abspath(
-            "src/toil/test/wdl/wdl_templates/testENCODE/encode_mapping_workflow.wdl")
-        json = os.path.abspath(
-            "src/toil/test/wdl/wdl_templates/testENCODE/encode_mapping_workflow.wdl.json")
-        ref_dir = os.path.abspath(
-            "src/toil/test/wdl/wdl_templates/testENCODE/output/")
-        subprocess.check_call(
-            self.base_command + [wdl, json, '--docker_user=None', '--out_dir', self.output_dir])
-        compare_runs(self.output_dir, ref_dir)
-    # estimated run time 2 sec
-    def testPipe(self):
-        """Test basic bash input functionality with a pipe."""
-        wdl = os.path.abspath(
-            "src/toil/test/wdl/wdl_templates/testPipe/call.wdl")
-        json = os.path.abspath(
-            "src/toil/test/wdl/wdl_templates/testPipe/call.json")
-        ref_dir = os.path.abspath(
-            "src/toil/test/wdl/wdl_templates/testPipe/output/")
-        subprocess.check_call(
-            self.base_command + [wdl, json, '--out_dir', self.output_dir])
-        compare_runs(self.output_dir, ref_dir)
-    # estimated run time <1 sec
-    def testJSON(self):
-        default_json_dict_output = {
-            'helloHaplotypeCaller.haplotypeCaller.RefIndex': '"src/toil/test/wdl/GATK_data/ref/human_g1k_b37_20.fasta.fai"',
-            'helloHaplotypeCaller.haplotypeCaller.sampleName': '"WDL_tut1_output"',
-            'helloHaplotypeCaller.haplotypeCaller.inputBAM': '"src/toil/test/wdl/GATK_data/inputs/NA12878_wgs_20.bam"',
-            'helloHaplotypeCaller.haplotypeCaller.bamIndex': '"src/toil/test/wdl/GATK_data/inputs/NA12878_wgs_20.bai"',
-            'helloHaplotypeCaller.haplotypeCaller.GATK': '"src/toil/test/wdl/GATK_data/gatk-package-4.1.9.0-local.jar"',
-            'helloHaplotypeCaller.haplotypeCaller.RefDict': '"src/toil/test/wdl/GATK_data/ref/human_g1k_b37_20.dict"',
-            'helloHaplotypeCaller.haplotypeCaller.RefFasta': '"src/toil/test/wdl/GATK_data/ref/human_g1k_b37_20.fasta"'}
-        from toil.wdl.utils import dict_from_JSON
-        json_dict = dict_from_JSON("src/toil/test/wdl/wdl_templates/t01/helloHaplotypeCaller_inputs.json")
-        assert json_dict == default_json_dict_output, (
-                str(json_dict) + '\nAssertionError: ' + str(default_json_dict_output))
-    # estimated run time <1 sec
-    def test_size_large(self) -> None:
-        """Test the wdl built-in functional equivalent of 'size()',
-        which returns a file's size based on the path, on a large file."""
-        from toil.common import Toil
-        from toil.job import Job
-        from toil.wdl.wdl_types import WDLFile
-        options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
-        options.clean = 'always'
-        with Toil(options) as toil:
-            large = process_infile(WDLFile(file_path=self.encode_data), toil)
-            larger_file = size(large)
-            larger_file_in_mb = size(large, 'mb')
-            assert larger_file >= 70000000, larger_file
-            assert larger_file_in_mb >= 70, larger_file_in_mb
-    @classmethod
-    def fetch_and_unzip_from_s3(cls, filename, data, data_dir):
-        if not os.path.exists(data):
-            s3_loc = os.path.join('http://toil-datasets.s3.amazonaws.com/', filename)
-            urlretrieve(s3_loc, data)
-        # extract the compressed data if not already extracted
-        if not os.path.exists(data_dir):
-            with zipfile.ZipFile(data, 'r') as zip_ref:
-                zip_ref.extractall(cls.test_directory)
-def compare_runs(output_dir, ref_dir):
-    """
-    Takes two directories and compares all of the files between those two
-    directories, asserting that they match.
-    - Ignores outputs.txt, which contains a list of the outputs in the folder.
-    - Compares line by line, unless the file is a .vcf file.
-    - Ignores potentially date-stamped comments (lines starting with '#').
-    - Ignores quality scores in .vcf files and only checks that they found
-      the same variants.  This is due to assumed small observed rounding
-      differences between systems.
-    :param ref_dir: The first directory to compare (with output_dir).
-    :param output_dir: The second directory to compare (with ref_dir).
-    """
-    reference_output_files = os.listdir(ref_dir)
-    for file in reference_output_files:
-        if file not in ('outputs.txt', '__pycache__'):
-            test_output_files = os.listdir(output_dir)
-            filepath = os.path.join(ref_dir, file)
-            with open(filepath) as default_file:
-                good_data = []
-                for line in default_file:
-                    if not line.startswith('#'):
-                        good_data.append(line)
-                for test_file in test_output_files:
-                    if file == test_file:
-                        test_filepath = os.path.join(output_dir, file)
-                        if file.endswith(".vcf"):
-                            compare_vcf_files(filepath1=filepath,
-                                              filepath2=test_filepath)
-                        else:
-                            with open(test_filepath) as test_file:
-                                test_data = []
-                                for line in test_file:
-                                    if not line.startswith('#'):
-                                        test_data.append(line)
-                            assert good_data == test_data, "File does not match: %r" % file
-def compare_vcf_files(filepath1, filepath2):
-    """
-    Asserts that two .vcf files contain the same variant findings.
-    - Ignores potentially date-stamped comments (lines starting with '#').
-    - Ignores quality scores in .vcf files and only checks that they found
-      the same variants.  This is due to assumed small observed rounding
-      differences between systems.
-    VCF File Column Contents:
-    1: #CHROM
-    2: POS
-    3: ID
-    4: REF
-    5: ALT
-    6: QUAL
-    7: FILTER
-    8: INFO
-    :param filepath1: First .vcf file to compare.
-    :param filepath2: Second .vcf file to compare.
-    """
-    with open(filepath1) as default_file:
-        good_data = []
-        for line in default_file:
-            line = line.strip()
-            if not line.startswith('#'):
-                good_data.append(line.split('\t'))
-    with open(filepath2) as test_file:
-        test_data = []
-        for line in test_file:
-            line = line.strip()
-            if not line.startswith('#'):
-                test_data.append(line.split('\t'))
-    for i in range(len(test_data)):
-        if test_data[i] != good_data[i]:
-            for j in range(len(test_data[i])):
-                # Only compare chromosome, position, ID, reference, and alts.
-                # Quality score may vary (<1%) between systems because of
-                # (assumed) rounding differences.  Same for the "info" sect.
-                if j < 5:
-                    if j == 4:
-                        if test_data[i][j].startswith('*,'):
-                            test_data[i][j] = test_data[i][j][2:]
-                        if good_data[i][j].startswith('*,'):
-                            good_data[i][j] = good_data[i][j][2:]
-                    assert test_data[i][j] == good_data[i][j], f"\nInconsistent VCFs: {filepath1} != {filepath2}\n" \
-                                                               f" - {test_data[i][j]} != {good_data[i][j]}\n" \
-                                                               f" - Line: {i} Column: {j}"
-if __name__ == "__main__":
-    unittest.main()  # run all tests

toil/wdl/toilwdl.py DELETED Viewed

@@ -1,141 +0,0 @@
-# Copyright (C) 2018-2021 UCSC Computational Genomics Lab
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import logging
-import os
-import subprocess
-import sys
-from toil.wdl.utils import dict_from_JSON, get_analyzer, write_mappings
-from toil.wdl.wdl_synthesis import SynthesizeWDL
-logger = logging.getLogger(__name__)
-def main():
-    """
-    A program to run WDL input files using native Toil scripts.
-    Calls two files, described below, wdl_analysis.py and wdl_synthesis.py:
-    wdl_analysis reads the wdl and restructures them into 2 intermediate data
-    structures before writing (python dictionaries):
-        "wf_dictionary": containing the parsed workflow information.
-        "tasks_dictionary": containing the parsed task information.
-    wdl_synthesis takes the "wf_dictionary", "tasks_dictionary", and the JSON file
-    and uses them to write a native python script for use with Toil.
-    Requires a WDL file, and a JSON file.  The WDL file contains ordered commands,
-    and the JSON file contains input values for those commands.  To run in Toil,
-    these two files must be parsed, restructured into python dictionaries, and
-    then compiled into a Toil formatted python script.  This compiled Toil script
-    is deleted unless the user specifies: "--dev_mode" as an option.
-    The WDL parser was auto-generated from the Broad's current WDL grammar file:
-    https://github.com/openwdl/wdl/blob/master/parsers/grammar.hgr
-    using Scott Frazer's Hermes: https://github.com/scottfrazer/hermes
-    Thank you Scott Frazer!
-    Currently in alpha testing, and known to work with the Broad's GATK tutorial
-    set for WDL on their main wdl site:
-    software.broadinstitute.org/wdl/documentation/topic?name=wdl-tutorials
-    And ENCODE's WDL workflow:
-    github.com/ENCODE-DCC/pipeline-container/blob/master/local-workflows/encode_mapping_workflow.wdl
-    Additional support to be broadened to include more features soon.
-    """
-    parser = argparse.ArgumentParser(description='Runs WDL files with toil.')
-    parser.add_argument('wdl_file', help='A WDL workflow file.')
-    parser.add_argument('secondary_file', help='A secondary data file (json).')
-    parser.add_argument("--jobStore", type=str, required=False, default=None)
-    parser.add_argument('-o',
-                        '--outdir',
-                        required=False,
-                        default=os.getcwd(),
-                        help='Optionally specify the directory that outputs '
-                             'are written to.  Default is the current working dir.')
-    parser.add_argument('--dev_mode', required=False, default=False,
-                        help='1. Creates "AST.out", which holds the printed AST and '
-                             '"mappings.out", which holds the parsed task, workflow '
-                             'dictionaries that were generated.  '
-                             '2. Saves the compiled toil script generated from the '
-                             'wdl/json files from deletion.  '
-                             '3. Skips autorunning the compiled python file.')
-    parser.add_argument('--docker_user', required=False, default='root',
-                        help='The user permissions that the docker containers will be run '
-                             'with (and the permissions set on any output files produced).  '
-                             'Default is "root".  Setting this to None will set this to '
-                             'the current user.')
-    parser.add_argument("--destBucket", type=str, required=False, default=False,
-                        help="Specify a cloud bucket endpoint for output files.")
-    # wdl_run_args is an array containing all of the unknown arguments not
-    # specified by the parser in this main.  All of these will be passed down in
-    # check_call later to run the compiled toil file.
-    args, wdl_run_args = parser.parse_known_args()
-    wdl_file = os.path.abspath(args.wdl_file)
-    args.secondary_file = os.path.abspath(args.secondary_file)
-    args.outdir = os.path.abspath(args.outdir)
-    aWDL = get_analyzer(wdl_file=wdl_file)
-    if args.dev_mode:
-        aWDL.write_AST(out_dir=args.outdir)
-    # read secondary file; create dictionary to hold variables
-    if args.secondary_file.endswith('.json'):
-        json_dict = dict_from_JSON(args.secondary_file)
-    else:
-        raise RuntimeError('Unsupported Secondary File Type.  Use json.')
-    aWDL.analyze()
-    sWDL = SynthesizeWDL(aWDL.version,
-                         aWDL.tasks_dictionary,
-                         aWDL.workflows_dictionary,
-                         args.outdir,
-                         json_dict,
-                         args.docker_user,
-                         args.jobStore,
-                         args.destBucket)
-    # use the AST dictionaries to write 4 strings
-    # these are the future 4 sections of the compiled toil python file
-    module_section = sWDL.write_modules()
-    fn_section = sWDL.write_functions()
-    main_section = sWDL.write_main()
-    # write 3 strings to a python output file
-    sWDL.write_python_file(module_section,
-                           fn_section,
-                           main_section,
-                           sWDL.output_file)
-    if args.dev_mode:
-        logger.debug('WDL file compiled to toil script.')
-        write_mappings(aWDL)
-    else:
-        logger.debug('WDL file compiled to toil script.  Running now.')
-        exe = sys.executable if sys.executable else 'python'
-        cmd = [exe, sWDL.output_file]
-        cmd.extend(wdl_run_args)
-        subprocess.check_call(cmd)
-        os.remove(sWDL.output_file)
-if __name__ == '__main__':
-    main()

toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

toil 5.12.0py3-none-any.whl → 6.1.0py3-none-any.whl