toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/batchSystems/abstractBatchSystem.py +13 -5
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
- toil/batchSystems/kubernetes.py +13 -2
- toil/batchSystems/mesos/batchSystem.py +33 -2
- toil/batchSystems/registry.py +15 -118
- toil/batchSystems/slurm.py +191 -16
- toil/common.py +20 -1
- toil/cwl/cwltoil.py +97 -119
- toil/cwl/utils.py +103 -3
- toil/fileStores/__init__.py +1 -1
- toil/fileStores/abstractFileStore.py +5 -2
- toil/fileStores/cachingFileStore.py +1 -1
- toil/job.py +30 -14
- toil/jobStores/abstractJobStore.py +35 -255
- toil/jobStores/aws/jobStore.py +864 -1964
- toil/jobStores/aws/utils.py +24 -270
- toil/jobStores/fileJobStore.py +2 -1
- toil/jobStores/googleJobStore.py +32 -13
- toil/jobStores/utils.py +0 -327
- toil/leader.py +27 -22
- toil/lib/accelerators.py +1 -1
- toil/lib/aws/config.py +22 -0
- toil/lib/aws/s3.py +477 -9
- toil/lib/aws/utils.py +22 -33
- toil/lib/checksum.py +88 -0
- toil/lib/conversions.py +33 -31
- toil/lib/directory.py +217 -0
- toil/lib/ec2.py +97 -29
- toil/lib/exceptions.py +2 -1
- toil/lib/expando.py +2 -2
- toil/lib/generatedEC2Lists.py +138 -19
- toil/lib/io.py +33 -2
- toil/lib/memoize.py +21 -7
- toil/lib/misc.py +1 -1
- toil/lib/pipes.py +385 -0
- toil/lib/plugins.py +106 -0
- toil/lib/retry.py +1 -1
- toil/lib/threading.py +1 -1
- toil/lib/url.py +320 -0
- toil/lib/web.py +4 -5
- toil/options/cwl.py +13 -1
- toil/options/runner.py +17 -10
- toil/options/wdl.py +12 -1
- toil/provisioners/__init__.py +5 -2
- toil/provisioners/aws/__init__.py +43 -36
- toil/provisioners/aws/awsProvisioner.py +47 -15
- toil/provisioners/node.py +60 -12
- toil/resource.py +3 -13
- toil/server/app.py +12 -6
- toil/server/cli/wes_cwl_runner.py +2 -2
- toil/server/wes/abstract_backend.py +21 -43
- toil/server/wes/toil_backend.py +2 -2
- toil/test/__init__.py +16 -18
- toil/test/batchSystems/batchSystemTest.py +2 -9
- toil/test/batchSystems/batch_system_plugin_test.py +7 -0
- toil/test/batchSystems/test_slurm.py +103 -14
- toil/test/cwl/cwlTest.py +181 -8
- toil/test/cwl/staging_cat.cwl +27 -0
- toil/test/cwl/staging_make_file.cwl +25 -0
- toil/test/cwl/staging_workflow.cwl +43 -0
- toil/test/cwl/zero_default.cwl +61 -0
- toil/test/docs/scripts/tutorial_staging.py +17 -8
- toil/test/docs/scriptsTest.py +2 -1
- toil/test/jobStores/jobStoreTest.py +23 -133
- toil/test/lib/aws/test_iam.py +7 -7
- toil/test/lib/aws/test_s3.py +30 -33
- toil/test/lib/aws/test_utils.py +9 -9
- toil/test/lib/test_url.py +69 -0
- toil/test/lib/url_plugin_test.py +105 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
- toil/test/provisioners/clusterTest.py +15 -2
- toil/test/provisioners/gceProvisionerTest.py +1 -1
- toil/test/server/serverTest.py +78 -36
- toil/test/src/autoDeploymentTest.py +2 -3
- toil/test/src/fileStoreTest.py +89 -87
- toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
- toil/test/utils/toilKillTest.py +35 -28
- toil/test/wdl/md5sum/md5sum-gs.json +1 -1
- toil/test/wdl/md5sum/md5sum.json +1 -1
- toil/test/wdl/testfiles/read_file.wdl +18 -0
- toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
- toil/test/wdl/wdltoil_test.py +171 -162
- toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
- toil/utils/toilDebugFile.py +6 -3
- toil/utils/toilSshCluster.py +23 -0
- toil/utils/toilStats.py +17 -2
- toil/utils/toilUpdateEC2Instances.py +1 -0
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +1179 -825
- toil/worker.py +16 -8
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
- toil/lib/iterables.py +0 -112
- toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,6 @@ from toil.common import Toil
|
|
|
4
4
|
from toil.job import Job
|
|
5
5
|
from toil.lib.io import mkdtemp
|
|
6
6
|
|
|
7
|
-
|
|
8
7
|
class HelloWorld(Job):
|
|
9
8
|
def __init__(self, id):
|
|
10
9
|
Job.__init__(self)
|
|
@@ -22,6 +21,7 @@ class HelloWorld(Job):
|
|
|
22
21
|
|
|
23
22
|
if __name__ == "__main__":
|
|
24
23
|
jobstore: str = mkdtemp("tutorial_staging")
|
|
24
|
+
tmp: str = mkdtemp("tutorial_staging_tmp")
|
|
25
25
|
os.rmdir(jobstore)
|
|
26
26
|
options = Job.Runner.getDefaultOptions(jobstore)
|
|
27
27
|
options.logLevel = "INFO"
|
|
@@ -29,17 +29,26 @@ if __name__ == "__main__":
|
|
|
29
29
|
|
|
30
30
|
with Toil(options) as toil:
|
|
31
31
|
if not toil.options.restart:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
32
|
+
# Prepare an input file
|
|
33
|
+
path = os.path.join(tmp, "in.txt")
|
|
34
|
+
with open(path, "w") as f:
|
|
35
|
+
f.write("Hello,\n")
|
|
36
|
+
# In a real workflow, you would obtain an input file path from the
|
|
37
|
+
# user.
|
|
38
|
+
|
|
39
|
+
# Stage it into the Toil job store.
|
|
40
|
+
#
|
|
41
|
+
# Note: this may create a symlink depending on the value of the
|
|
42
|
+
# --linkImports command line option, in which case the original
|
|
43
|
+
# input file needs to still exist if the workflow is restarted.
|
|
44
|
+
inputFileID = toil.importFile(f"file://{path}")
|
|
45
|
+
|
|
46
|
+
# Run the workflow
|
|
38
47
|
outputFileID = toil.start(HelloWorld(inputFileID))
|
|
39
48
|
else:
|
|
40
49
|
outputFileID = toil.restart()
|
|
41
50
|
|
|
42
51
|
toil.exportFile(
|
|
43
52
|
outputFileID,
|
|
44
|
-
"file://" + os.path.
|
|
53
|
+
"file://" + os.path.join(tmp, "out.txt"),
|
|
45
54
|
)
|
toil/test/docs/scriptsTest.py
CHANGED
|
@@ -36,8 +36,9 @@ class ToilDocumentationTest(ToilTest):
|
|
|
36
36
|
|
|
37
37
|
def checkExitCode(self, script, extra_args: list[str] = []):
|
|
38
38
|
program = os.path.join(self.directory, "scripts", script)
|
|
39
|
+
job_store = self._getTestJobStorePath()
|
|
39
40
|
process = subprocess.Popen(
|
|
40
|
-
[python, program, "file:
|
|
41
|
+
[python, program, f"file:{job_store}", "--clean=always"] + extra_args,
|
|
41
42
|
stdout=subprocess.PIPE,
|
|
42
43
|
stderr=subprocess.PIPE,
|
|
43
44
|
)
|
|
@@ -420,27 +420,31 @@ class AbstractJobStoreTest:
|
|
|
420
420
|
|
|
421
421
|
def testReadWriteFileStreamTextMode(self):
|
|
422
422
|
"""Checks if text mode is compatible for file streams."""
|
|
423
|
-
|
|
423
|
+
jobstore1 = self.jobstore_initialized
|
|
424
|
+
jobstore2 = self.jobstore_resumed_noconfig
|
|
424
425
|
job = self.arbitraryJob()
|
|
425
|
-
|
|
426
|
-
|
|
426
|
+
jobstore1.assign_job_id(job)
|
|
427
|
+
jobstore1.create_job(job)
|
|
427
428
|
|
|
428
429
|
foo = "foo"
|
|
429
430
|
bar = "bar"
|
|
430
431
|
|
|
431
|
-
with
|
|
432
|
+
with jobstore1.write_file_stream(job.jobStoreID, encoding="utf-8") as (
|
|
432
433
|
f,
|
|
433
434
|
fileID,
|
|
434
435
|
):
|
|
435
436
|
f.write(foo)
|
|
436
437
|
|
|
437
|
-
with
|
|
438
|
+
with jobstore1.read_file_stream(fileID, encoding="utf-8") as f:
|
|
438
439
|
self.assertEqual(foo, f.read())
|
|
439
440
|
|
|
440
|
-
with
|
|
441
|
+
with jobstore1.update_file_stream(fileID, encoding="utf-8") as f:
|
|
441
442
|
f.write(bar)
|
|
442
443
|
|
|
443
|
-
with
|
|
444
|
+
with jobstore1.read_file_stream(fileID, encoding="utf-8") as f:
|
|
445
|
+
self.assertEqual(bar, f.read())
|
|
446
|
+
|
|
447
|
+
with jobstore2.read_file_stream(fileID, encoding="utf-8") as f:
|
|
444
448
|
self.assertEqual(bar, f.read())
|
|
445
449
|
|
|
446
450
|
def testPerJobFiles(self):
|
|
@@ -1173,6 +1177,9 @@ class AbstractEncryptedJobStoreTest:
|
|
|
1173
1177
|
Create an encrypted file. Read it in encrypted mode then try with encryption off
|
|
1174
1178
|
to ensure that it fails.
|
|
1175
1179
|
"""
|
|
1180
|
+
|
|
1181
|
+
from toil.lib.aws.s3 import AWSBadEncryptionKeyError
|
|
1182
|
+
|
|
1176
1183
|
phrase = b"This file is encrypted."
|
|
1177
1184
|
fileName = "foo"
|
|
1178
1185
|
with self.jobstore_initialized.write_shared_file_stream(
|
|
@@ -1186,13 +1193,14 @@ class AbstractEncryptedJobStoreTest:
|
|
|
1186
1193
|
self.jobstore_initialized.config.sseKey = None
|
|
1187
1194
|
try:
|
|
1188
1195
|
with self.jobstore_initialized.read_shared_file_stream(fileName) as f:
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
+
# If the read goes through, we should fail the assert because
|
|
1197
|
+
# we read the cyphertext
|
|
1198
|
+
assert f.read() != phrase, (
|
|
1199
|
+
"Managed to read plaintext content with encryption off."
|
|
1200
|
+
)
|
|
1201
|
+
except AWSBadEncryptionKeyError as e:
|
|
1202
|
+
# If the read doesn't go through, we get this.
|
|
1203
|
+
assert "Your AWS encryption key is most likely configured incorrectly" in str(e)
|
|
1196
1204
|
|
|
1197
1205
|
|
|
1198
1206
|
class FileJobStoreTest(AbstractJobStoreTest.Test):
|
|
@@ -1435,113 +1443,6 @@ class AWSJobStoreTest(AbstractJobStoreTest.Test):
|
|
|
1435
1443
|
assert isinstance(self.jobstore_initialized, AWSJobStore) # type hinting
|
|
1436
1444
|
self.jobstore_initialized.destroy()
|
|
1437
1445
|
|
|
1438
|
-
def testSDBDomainsDeletedOnFailedJobstoreBucketCreation(self):
|
|
1439
|
-
"""
|
|
1440
|
-
This test ensures that SDB domains bound to a jobstore are deleted if the jobstore bucket
|
|
1441
|
-
failed to be created. We simulate a failed jobstore bucket creation by using a bucket in a
|
|
1442
|
-
different region with the same name.
|
|
1443
|
-
"""
|
|
1444
|
-
from botocore.exceptions import ClientError
|
|
1445
|
-
|
|
1446
|
-
from toil.jobStores.aws.jobStore import BucketLocationConflictException
|
|
1447
|
-
from toil.lib.aws.session import establish_boto3_session
|
|
1448
|
-
from toil.lib.aws.utils import retry_s3
|
|
1449
|
-
|
|
1450
|
-
externalAWSLocation = "us-west-1"
|
|
1451
|
-
for testRegion in "us-east-1", "us-west-2":
|
|
1452
|
-
# We run this test twice, once with the default s3 server us-east-1 as the test region
|
|
1453
|
-
# and once with another server (us-west-2). The external server is always us-west-1.
|
|
1454
|
-
# This incidentally tests that the BucketLocationConflictException is thrown when using
|
|
1455
|
-
# both the default, and a non-default server.
|
|
1456
|
-
testJobStoreUUID = str(uuid.uuid4())
|
|
1457
|
-
# Create the bucket at the external region
|
|
1458
|
-
bucketName = "domain-test-" + testJobStoreUUID + "--files"
|
|
1459
|
-
client = establish_boto3_session().client(
|
|
1460
|
-
"s3", region_name=externalAWSLocation
|
|
1461
|
-
)
|
|
1462
|
-
resource = establish_boto3_session().resource(
|
|
1463
|
-
"s3", region_name=externalAWSLocation
|
|
1464
|
-
)
|
|
1465
|
-
|
|
1466
|
-
for attempt in retry_s3(delays=(2, 5, 10, 30, 60), timeout=600):
|
|
1467
|
-
with attempt:
|
|
1468
|
-
# Create the bucket at the home region
|
|
1469
|
-
client.create_bucket(
|
|
1470
|
-
Bucket=bucketName,
|
|
1471
|
-
CreateBucketConfiguration={
|
|
1472
|
-
"LocationConstraint": externalAWSLocation
|
|
1473
|
-
},
|
|
1474
|
-
)
|
|
1475
|
-
|
|
1476
|
-
owner_tag = os.environ.get("TOIL_OWNER_TAG")
|
|
1477
|
-
if owner_tag:
|
|
1478
|
-
for attempt in retry_s3(delays=(1, 1, 2, 4, 8, 16), timeout=33):
|
|
1479
|
-
with attempt:
|
|
1480
|
-
bucket_tagging = resource.BucketTagging(bucketName)
|
|
1481
|
-
bucket_tagging.put(
|
|
1482
|
-
Tagging={"TagSet": [{"Key": "Owner", "Value": owner_tag}]}
|
|
1483
|
-
)
|
|
1484
|
-
|
|
1485
|
-
options = Job.Runner.getDefaultOptions(
|
|
1486
|
-
"aws:" + testRegion + ":domain-test-" + testJobStoreUUID
|
|
1487
|
-
)
|
|
1488
|
-
options.logLevel = "DEBUG"
|
|
1489
|
-
try:
|
|
1490
|
-
with Toil(options) as toil:
|
|
1491
|
-
pass
|
|
1492
|
-
except BucketLocationConflictException:
|
|
1493
|
-
# Catch the expected BucketLocationConflictException and ensure that the bound
|
|
1494
|
-
# domains don't exist in SDB.
|
|
1495
|
-
sdb = establish_boto3_session().client(
|
|
1496
|
-
region_name=self.awsRegion(), service_name="sdb"
|
|
1497
|
-
)
|
|
1498
|
-
next_token = None
|
|
1499
|
-
allDomainNames = []
|
|
1500
|
-
while True:
|
|
1501
|
-
if next_token is None:
|
|
1502
|
-
domains = sdb.list_domains(MaxNumberOfDomains=100)
|
|
1503
|
-
else:
|
|
1504
|
-
domains = sdb.list_domains(
|
|
1505
|
-
MaxNumberOfDomains=100, NextToken=next_token
|
|
1506
|
-
)
|
|
1507
|
-
allDomainNames.extend(domains["DomainNames"])
|
|
1508
|
-
next_token = domains.get("NextToken")
|
|
1509
|
-
if next_token is None:
|
|
1510
|
-
break
|
|
1511
|
-
self.assertFalse([d for d in allDomainNames if testJobStoreUUID in d])
|
|
1512
|
-
else:
|
|
1513
|
-
self.fail()
|
|
1514
|
-
finally:
|
|
1515
|
-
try:
|
|
1516
|
-
for attempt in retry_s3():
|
|
1517
|
-
with attempt:
|
|
1518
|
-
client.delete_bucket(Bucket=bucketName)
|
|
1519
|
-
except ClientError as e:
|
|
1520
|
-
# The actual HTTP code of the error is in status.
|
|
1521
|
-
if (
|
|
1522
|
-
e.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
|
|
1523
|
-
== 404
|
|
1524
|
-
):
|
|
1525
|
-
# The bucket doesn't exist; maybe a failed delete actually succeeded.
|
|
1526
|
-
pass
|
|
1527
|
-
else:
|
|
1528
|
-
raise
|
|
1529
|
-
|
|
1530
|
-
@slow
|
|
1531
|
-
def testInlinedFiles(self):
|
|
1532
|
-
from toil.jobStores.aws.jobStore import AWSJobStore
|
|
1533
|
-
|
|
1534
|
-
jobstore = self.jobstore_initialized
|
|
1535
|
-
for encrypted in (True, False):
|
|
1536
|
-
n = AWSJobStore.FileInfo.maxInlinedSize()
|
|
1537
|
-
sizes = (1, n // 2, n - 1, n, n + 1, 2 * n)
|
|
1538
|
-
for size in chain(sizes, islice(reversed(sizes), 1)):
|
|
1539
|
-
s = os.urandom(size)
|
|
1540
|
-
with jobstore.write_shared_file_stream("foo") as f:
|
|
1541
|
-
f.write(s)
|
|
1542
|
-
with jobstore.read_shared_file_stream("foo") as f:
|
|
1543
|
-
self.assertEqual(s, f.read())
|
|
1544
|
-
|
|
1545
1446
|
def testOverlargeJob(self):
|
|
1546
1447
|
jobstore = self.jobstore_initialized
|
|
1547
1448
|
jobRequirements = dict(memory=12, cores=34, disk=35, preemptible=True)
|
|
@@ -1661,19 +1562,8 @@ class AWSJobStoreTest(AbstractJobStoreTest.Test):
|
|
|
1661
1562
|
)
|
|
1662
1563
|
delete_s3_bucket(resource, bucket.name)
|
|
1663
1564
|
|
|
1664
|
-
def _largeLogEntrySize(self):
|
|
1665
|
-
from toil.jobStores.aws.jobStore import AWSJobStore
|
|
1666
|
-
|
|
1667
|
-
# So we get into the else branch of reader() in uploadStream(multiPart=False):
|
|
1668
|
-
return AWSJobStore.FileInfo.maxBinarySize() * 2
|
|
1669
|
-
|
|
1670
|
-
def _batchDeletionSize(self):
|
|
1671
|
-
from toil.jobStores.aws.jobStore import AWSJobStore
|
|
1672
|
-
|
|
1673
|
-
return AWSJobStore.itemsPerBatchDelete
|
|
1674
|
-
|
|
1675
1565
|
|
|
1676
|
-
@needs_aws_s3
|
|
1566
|
+
# @needs_aws_s3
|
|
1677
1567
|
class InvalidAWSJobStoreTest(ToilTest):
|
|
1678
1568
|
def testInvalidJobStoreName(self):
|
|
1679
1569
|
from toil.jobStores.aws.jobStore import AWSJobStore
|
toil/test/lib/aws/test_iam.py
CHANGED
|
@@ -28,9 +28,9 @@ logging.basicConfig(level=logging.DEBUG)
|
|
|
28
28
|
class IAMTest(ToilTest):
|
|
29
29
|
"""Check that given permissions and associated functions perform correctly"""
|
|
30
30
|
|
|
31
|
-
def test_permissions_iam(self):
|
|
31
|
+
def test_permissions_iam(self) -> None:
|
|
32
32
|
granted_perms = {
|
|
33
|
-
"*": {"Action": ["ec2:*", "iam:*", "s3:*"
|
|
33
|
+
"*": {"Action": ["ec2:*", "iam:*", "s3:*"], "NotAction": []}
|
|
34
34
|
}
|
|
35
35
|
assert (
|
|
36
36
|
iam.policy_permissions_allow(
|
|
@@ -46,8 +46,8 @@ class IAMTest(ToilTest):
|
|
|
46
46
|
is True
|
|
47
47
|
)
|
|
48
48
|
|
|
49
|
-
def test_negative_permissions_iam(self):
|
|
50
|
-
granted_perms = {"*": {"Action": ["ec2:*", "s3:*"
|
|
49
|
+
def test_negative_permissions_iam(self) -> None:
|
|
50
|
+
granted_perms = {"*": {"Action": ["ec2:*", "s3:*"], "NotAction": []}}
|
|
51
51
|
assert (
|
|
52
52
|
iam.policy_permissions_allow(
|
|
53
53
|
granted_perms, iam.CLUSTER_LAUNCHING_PERMISSIONS
|
|
@@ -62,7 +62,7 @@ class IAMTest(ToilTest):
|
|
|
62
62
|
is False
|
|
63
63
|
)
|
|
64
64
|
|
|
65
|
-
def test_wildcard_handling(self):
|
|
65
|
+
def test_wildcard_handling(self) -> None:
|
|
66
66
|
assert iam.permission_matches_any("iam:CreateRole", ["iam:Create**"]) is True
|
|
67
67
|
assert iam.permission_matches_any("iam:GetUser", ["iam:???????"]) is True
|
|
68
68
|
assert iam.permission_matches_any("iam:ListRoleTags", ["iam:*?*Tags"]) is True
|
|
@@ -71,7 +71,7 @@ class IAMTest(ToilTest):
|
|
|
71
71
|
|
|
72
72
|
@mock_aws
|
|
73
73
|
@needs_aws_s3 # mock is incomplete, this avoid 'botocore.exceptions.NoCredentialsError: Unable to locate credentials'
|
|
74
|
-
def test_get_policy_permissions(self):
|
|
74
|
+
def test_get_policy_permissions(self) -> None:
|
|
75
75
|
mock_iam = boto3.client("iam")
|
|
76
76
|
|
|
77
77
|
# username that moto pretends we have from client.get_user()
|
|
@@ -167,7 +167,7 @@ class IAMTest(ToilTest):
|
|
|
167
167
|
assert notactions_set == set()
|
|
168
168
|
|
|
169
169
|
@needs_aws_s3
|
|
170
|
-
def test_create_delete_iam_role(self):
|
|
170
|
+
def test_create_delete_iam_role(self) -> None:
|
|
171
171
|
region = "us-west-2"
|
|
172
172
|
role_name = f'test{str(uuid4()).replace("-", "")}'
|
|
173
173
|
with self.subTest("Create role w/policies."):
|
toil/test/lib/aws/test_s3.py
CHANGED
|
@@ -18,67 +18,64 @@ from typing import TYPE_CHECKING, Optional
|
|
|
18
18
|
|
|
19
19
|
from toil.jobStores.aws.jobStore import AWSJobStore
|
|
20
20
|
from toil.lib.aws.session import establish_boto3_session
|
|
21
|
-
from toil.lib.aws.utils import create_s3_bucket, get_bucket_region
|
|
21
|
+
from toil.lib.aws.utils import create_s3_bucket, delete_s3_bucket, get_bucket_region
|
|
22
22
|
from toil.test import ToilTest, needs_aws_s3
|
|
23
23
|
|
|
24
24
|
logger = logging.getLogger(__name__)
|
|
25
25
|
logging.basicConfig(level=logging.DEBUG)
|
|
26
26
|
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from mypy_boto3_s3 import S3ServiceResource
|
|
29
|
+
from mypy_boto3_s3.service_resource import Bucket
|
|
27
30
|
|
|
28
31
|
@needs_aws_s3
|
|
29
32
|
class S3Test(ToilTest):
|
|
30
33
|
"""Confirm the workarounds for us-east-1."""
|
|
31
34
|
|
|
32
|
-
if TYPE_CHECKING:
|
|
33
|
-
from mypy_boto3_s3 import S3ServiceResource
|
|
34
|
-
from mypy_boto3_s3.service_resource import Bucket
|
|
35
|
-
|
|
36
35
|
s3_resource: Optional["S3ServiceResource"]
|
|
37
|
-
bucket: Optional["Bucket"]
|
|
38
36
|
|
|
39
37
|
@classmethod
|
|
40
38
|
def setUpClass(cls) -> None:
|
|
41
39
|
super().setUpClass()
|
|
42
40
|
session = establish_boto3_session(region_name="us-east-1")
|
|
43
41
|
cls.s3_resource = session.resource("s3", region_name="us-east-1")
|
|
44
|
-
cls.bucket = None
|
|
45
42
|
|
|
46
43
|
def test_create_bucket(self) -> None:
|
|
47
44
|
"""Test bucket creation for us-east-1."""
|
|
48
45
|
bucket_name = f"toil-s3test-{uuid.uuid4()}"
|
|
49
46
|
assert self.s3_resource
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
47
|
+
bucket: Optional["Bucket"] = None
|
|
48
|
+
try:
|
|
49
|
+
bucket = create_s3_bucket(self.s3_resource, bucket_name, "us-east-1")
|
|
50
|
+
bucket.wait_until_exists()
|
|
51
|
+
owner_tag = os.environ.get("TOIL_OWNER_TAG")
|
|
52
|
+
if owner_tag:
|
|
53
|
+
bucket_tagging = self.s3_resource.BucketTagging(bucket_name)
|
|
54
|
+
bucket_tagging.put(
|
|
55
|
+
Tagging={"TagSet": [{"Key": "Owner", "Value": owner_tag}]}
|
|
56
|
+
)
|
|
57
|
+
self.assertEqual(get_bucket_region(bucket_name), "us-east-1")
|
|
59
58
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
59
|
+
# Make sure all the bucket location getting strategies work on a bucket we created
|
|
60
|
+
self.assertEqual(
|
|
61
|
+
get_bucket_region(bucket_name, only_strategies={1}), "us-east-1"
|
|
62
|
+
)
|
|
63
|
+
self.assertEqual(
|
|
64
|
+
get_bucket_region(bucket_name, only_strategies={2}), "us-east-1"
|
|
65
|
+
)
|
|
66
|
+
self.assertEqual(
|
|
67
|
+
get_bucket_region(bucket_name, only_strategies={3}), "us-east-1"
|
|
68
|
+
)
|
|
69
|
+
finally:
|
|
70
|
+
# Clean up the bucket if we managed to make it
|
|
71
|
+
if bucket is not None:
|
|
72
|
+
delete_s3_bucket(self.s3_resource, bucket_name)
|
|
70
73
|
|
|
71
74
|
def test_get_bucket_location_public_bucket(self) -> None:
|
|
72
75
|
"""
|
|
73
|
-
Test getting
|
|
76
|
+
Test getting bucket location for a bucket we don't own.
|
|
74
77
|
"""
|
|
75
78
|
|
|
76
79
|
bucket_name = "spacenet-dataset"
|
|
77
80
|
# This bucket happens to live in us-east-1
|
|
78
81
|
self.assertEqual(get_bucket_region(bucket_name), "us-east-1")
|
|
79
|
-
|
|
80
|
-
@classmethod
|
|
81
|
-
def tearDownClass(cls) -> None:
|
|
82
|
-
if cls.bucket:
|
|
83
|
-
AWSJobStore._delete_bucket(cls.bucket)
|
|
84
|
-
super().tearDownClass()
|
toil/test/lib/aws/test_utils.py
CHANGED
|
@@ -27,35 +27,35 @@ class TagGenerationTest(ToilTest):
|
|
|
27
27
|
Test for tag generation from environment variables
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
|
-
def test_build_tag(self):
|
|
30
|
+
def test_build_tag(self) -> None:
|
|
31
31
|
environment = dict()
|
|
32
32
|
environment["TOIL_OWNER_TAG"] = "😀"
|
|
33
|
-
environment["TOIL_AWS_TAGS"] =
|
|
33
|
+
environment["TOIL_AWS_TAGS"] = ""
|
|
34
34
|
tag_dict = build_tag_dict_from_env(environment)
|
|
35
35
|
assert tag_dict == {"Owner": "😀"}
|
|
36
36
|
|
|
37
|
-
def test_empty_aws_tags(self):
|
|
37
|
+
def test_empty_aws_tags(self) -> None:
|
|
38
38
|
environment = dict()
|
|
39
|
-
environment["TOIL_OWNER_TAG"] =
|
|
39
|
+
environment["TOIL_OWNER_TAG"] = ""
|
|
40
40
|
environment["TOIL_AWS_TAGS"] = "{}"
|
|
41
41
|
tag_dict = build_tag_dict_from_env(environment)
|
|
42
42
|
assert tag_dict == dict()
|
|
43
43
|
|
|
44
|
-
def test_incorrect_json_object(self):
|
|
44
|
+
def test_incorrect_json_object(self) -> None:
|
|
45
45
|
with pytest.raises(SystemExit):
|
|
46
46
|
environment = dict()
|
|
47
|
-
environment["TOIL_OWNER_TAG"] =
|
|
47
|
+
environment["TOIL_OWNER_TAG"] = ""
|
|
48
48
|
environment["TOIL_AWS_TAGS"] = "231"
|
|
49
49
|
tag_dict = build_tag_dict_from_env(environment)
|
|
50
50
|
|
|
51
|
-
def test_incorrect_json_emoji(self):
|
|
51
|
+
def test_incorrect_json_emoji(self) -> None:
|
|
52
52
|
with pytest.raises(SystemExit):
|
|
53
53
|
environment = dict()
|
|
54
|
-
environment["TOIL_OWNER_TAG"] =
|
|
54
|
+
environment["TOIL_OWNER_TAG"] = ""
|
|
55
55
|
environment["TOIL_AWS_TAGS"] = "😀"
|
|
56
56
|
tag_dict = build_tag_dict_from_env(environment)
|
|
57
57
|
|
|
58
|
-
def test_build_tag_with_tags(self):
|
|
58
|
+
def test_build_tag_with_tags(self) -> None:
|
|
59
59
|
environment = dict()
|
|
60
60
|
environment["TOIL_OWNER_TAG"] = "😀"
|
|
61
61
|
environment["TOIL_AWS_TAGS"] = '{"1": "2", " ":")"}'
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# Copyright (C) 2015-2022 Regents of the University of California
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import getpass
|
|
15
|
+
import logging
|
|
16
|
+
|
|
17
|
+
from pytest_httpserver import HTTPServer
|
|
18
|
+
|
|
19
|
+
from toil.lib.misc import get_user_name
|
|
20
|
+
from toil.lib.url import URLAccess
|
|
21
|
+
from toil.test import needs_aws_s3, needs_online
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
26
|
+
|
|
27
|
+
class TestURLAccess():
|
|
28
|
+
"""
|
|
29
|
+
Test URLAccess class handling read, list,
|
|
30
|
+
and checking the size/existence of resources at given URL
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def test_get_url_access(self, httpserver: HTTPServer) -> None:
|
|
34
|
+
httpserver.expect_request("/some_url").respond_with_data("Yep that's a URL")
|
|
35
|
+
file_url = httpserver.url_for("/some_url")
|
|
36
|
+
assert URLAccess.url_exists(file_url)
|
|
37
|
+
|
|
38
|
+
@needs_aws_s3
|
|
39
|
+
def test_get_size(self) -> None:
|
|
40
|
+
size = URLAccess.get_size("s3://toil-datasets/hello.txt")
|
|
41
|
+
assert isinstance(size, int)
|
|
42
|
+
assert size > 0
|
|
43
|
+
|
|
44
|
+
@needs_aws_s3
|
|
45
|
+
def test_get_is_directory(self) -> None:
|
|
46
|
+
assert not URLAccess.get_is_directory("s3://toil-datasets/hello.txt")
|
|
47
|
+
|
|
48
|
+
@needs_aws_s3
|
|
49
|
+
def test_list_url(self) -> None:
|
|
50
|
+
test_dir = URLAccess.list_url("s3://1000genomes/")
|
|
51
|
+
assert isinstance(test_dir, list)
|
|
52
|
+
assert len(test_dir) > 0
|
|
53
|
+
|
|
54
|
+
@needs_aws_s3
|
|
55
|
+
def test_read_from_url(self) -> None:
|
|
56
|
+
import io
|
|
57
|
+
output = io.BytesIO()
|
|
58
|
+
size, executable = URLAccess.read_from_url("s3://toil-datasets/hello.txt", output)
|
|
59
|
+
assert isinstance(size, int)
|
|
60
|
+
assert size > 0
|
|
61
|
+
assert not executable
|
|
62
|
+
assert len(output.getvalue()) > 0
|
|
63
|
+
|
|
64
|
+
@needs_aws_s3
|
|
65
|
+
def test_open_url(self) -> None:
|
|
66
|
+
with URLAccess.open_url("s3://toil-datasets/hello.txt") as readable:
|
|
67
|
+
content = readable.read()
|
|
68
|
+
assert isinstance(content, bytes)
|
|
69
|
+
assert len(content) > 0
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Copyright (C) 2015-2025 Regents of the University of California
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import logging
|
|
15
|
+
from typing import IO, Optional, Union
|
|
16
|
+
|
|
17
|
+
from configargparse import ArgParser, ArgumentParser
|
|
18
|
+
|
|
19
|
+
from toil.batchSystems.abstractBatchSystem import (
|
|
20
|
+
AbstractBatchSystem,
|
|
21
|
+
UpdatedBatchJobInfo,
|
|
22
|
+
)
|
|
23
|
+
from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
|
|
24
|
+
from toil.batchSystems.options import OptionSetter
|
|
25
|
+
from toil.batchSystems.registry import add_batch_system_factory
|
|
26
|
+
from toil.common import Toil, addOptions
|
|
27
|
+
from toil.job import JobDescription
|
|
28
|
+
|
|
29
|
+
import io
|
|
30
|
+
from urllib.parse import ParseResult
|
|
31
|
+
from toil.test import ToilTest
|
|
32
|
+
from toil.lib.url import URLAccess
|
|
33
|
+
from toil.lib.plugins import register_plugin, remove_plugin
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
class FakeURLPlugin(URLAccess):
|
|
38
|
+
@classmethod
|
|
39
|
+
def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
|
|
40
|
+
return url.scheme == "fake"
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def _url_exists(cls, url: ParseResult) -> bool:
|
|
44
|
+
return url.netloc == "exists"
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def _get_size(cls, url: ParseResult) -> int:
|
|
48
|
+
return 1234
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def _get_is_directory(cls, url: ParseResult) -> bool:
|
|
52
|
+
return url.path.endswith("/")
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def _list_url(cls, url: ParseResult) -> list[str]:
|
|
56
|
+
return ["file1.txt", "subdir/"]
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> tuple[int, bool]:
|
|
60
|
+
content = b"hello world"
|
|
61
|
+
writable.write(content)
|
|
62
|
+
return len(content), False
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
66
|
+
return io.BytesIO(b"hello world")
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def _write_to_url(cls, readable: Union[IO[bytes], IO[str]], url: ParseResult, executable: bool = False) -> None:
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class TestURLAccess(ToilTest):
|
|
74
|
+
def setUp(self) -> None:
|
|
75
|
+
super().setUp()
|
|
76
|
+
register_plugin("url_access", "fake", lambda: FakeURLPlugin)
|
|
77
|
+
|
|
78
|
+
def tearDown(self) -> None:
|
|
79
|
+
remove_plugin("url_access", "fake")
|
|
80
|
+
super().tearDown()
|
|
81
|
+
|
|
82
|
+
def test_url_exists(self) -> None:
|
|
83
|
+
assert URLAccess.url_exists("fake://exists/resource") == True
|
|
84
|
+
assert URLAccess.url_exists("fake://missing/resource") == False
|
|
85
|
+
|
|
86
|
+
def test_get_size(self) -> None:
|
|
87
|
+
assert URLAccess.get_size("fake://any/resource") == 1234
|
|
88
|
+
|
|
89
|
+
def test_get_is_directory(self) -> None:
|
|
90
|
+
assert URLAccess.get_is_directory("fake://any/folder/") == True
|
|
91
|
+
assert URLAccess.get_is_directory("fake://any/file.txt") == False
|
|
92
|
+
|
|
93
|
+
def test_list_url(self) -> None:
|
|
94
|
+
assert URLAccess.list_url("fake://any/folder/") == ["file1.txt", "subdir/"]
|
|
95
|
+
|
|
96
|
+
def test_read_from_url(self) -> None:
|
|
97
|
+
output = io.BytesIO()
|
|
98
|
+
size, _ = URLAccess.read_from_url("fake://any/resource", output)
|
|
99
|
+
assert output.getvalue() == b"hello world"
|
|
100
|
+
assert size == len("hello world")
|
|
101
|
+
|
|
102
|
+
def test_open_url(self) -> None:
|
|
103
|
+
with URLAccess.open_url("fake://any/resource") as stream:
|
|
104
|
+
content = stream.read()
|
|
105
|
+
assert content == b"hello world"
|