toil 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. toil/batchSystems/registry.py +15 -118
  2. toil/common.py +20 -1
  3. toil/cwl/cwltoil.py +80 -37
  4. toil/cwl/utils.py +103 -3
  5. toil/jobStores/abstractJobStore.py +11 -236
  6. toil/jobStores/aws/jobStore.py +2 -1
  7. toil/jobStores/fileJobStore.py +2 -1
  8. toil/jobStores/googleJobStore.py +7 -4
  9. toil/lib/accelerators.py +1 -1
  10. toil/lib/generatedEC2Lists.py +81 -19
  11. toil/lib/misc.py +1 -1
  12. toil/lib/plugins.py +106 -0
  13. toil/lib/url.py +320 -0
  14. toil/options/cwl.py +13 -1
  15. toil/options/runner.py +17 -10
  16. toil/options/wdl.py +12 -1
  17. toil/provisioners/aws/awsProvisioner.py +25 -2
  18. toil/server/app.py +12 -6
  19. toil/server/cli/wes_cwl_runner.py +2 -2
  20. toil/server/wes/abstract_backend.py +21 -43
  21. toil/server/wes/toil_backend.py +2 -2
  22. toil/test/__init__.py +2 -2
  23. toil/test/batchSystems/batchSystemTest.py +2 -9
  24. toil/test/batchSystems/batch_system_plugin_test.py +7 -0
  25. toil/test/cwl/cwlTest.py +181 -8
  26. toil/test/docs/scriptsTest.py +2 -1
  27. toil/test/lib/test_url.py +69 -0
  28. toil/test/lib/url_plugin_test.py +105 -0
  29. toil/test/provisioners/aws/awsProvisionerTest.py +1 -1
  30. toil/test/provisioners/clusterTest.py +15 -2
  31. toil/test/provisioners/gceProvisionerTest.py +1 -1
  32. toil/test/server/serverTest.py +78 -36
  33. toil/test/wdl/md5sum/md5sum-gs.json +1 -1
  34. toil/test/wdl/testfiles/read_file.wdl +18 -0
  35. toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
  36. toil/test/wdl/wdltoil_test.py +74 -125
  37. toil/utils/toilSshCluster.py +23 -0
  38. toil/utils/toilUpdateEC2Instances.py +1 -0
  39. toil/version.py +9 -9
  40. toil/wdl/wdltoil.py +182 -314
  41. toil/worker.py +11 -6
  42. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/METADATA +23 -23
  43. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/RECORD +47 -42
  44. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/WHEEL +1 -1
  45. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/entry_points.txt +0 -0
  46. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/licenses/LICENSE +0 -0
  47. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/top_level.txt +0 -0
@@ -224,7 +224,7 @@ class WESBackend:
224
224
  )
225
225
 
226
226
  def collect_attachments(
227
- self, run_id: Optional[str], temp_dir: Optional[str]
227
+ self, args: dict[str, Any], run_id: Optional[str], temp_dir: Optional[str]
228
228
  ) -> tuple[str, dict[str, Any]]:
229
229
  """
230
230
  Collect attachments from the current request by staging uploaded files
@@ -238,48 +238,26 @@ class WESBackend:
238
238
  temp_dir = mkdtemp()
239
239
  body: dict[str, Any] = {}
240
240
  has_attachments = False
241
- for key, ls in connexion.request.files.lists():
242
- try:
243
- for value in ls:
244
- # uploaded files that are required to execute the workflow
245
- if key == "workflow_attachment":
246
- # guard against maliciously constructed filenames
247
- dest = os.path.join(temp_dir, self.secure_path(value.filename))
248
- if not os.path.isdir(os.path.dirname(dest)):
249
- os.makedirs(os.path.dirname(dest))
250
- self.log_for_run(
251
- run_id, f"Staging attachment '{value.filename}' to '{dest}'"
252
- )
253
- value.save(dest)
254
- has_attachments = True
255
- body[key] = (
256
- f"file://{temp_dir}" # Reference to temp working dir.
257
- )
258
-
259
- elif key in (
260
- "workflow_params",
261
- "tags",
262
- "workflow_engine_parameters",
263
- ):
264
- content = value.read()
265
- body[key] = json.loads(content.decode("utf-8"))
266
- else:
267
- body[key] = value.read().decode()
268
- except Exception as e:
269
- raise MalformedRequestException(f"Error reading parameter '{key}': {e}")
270
-
271
- for key, ls in connexion.request.form.lists():
272
- try:
273
- for value in ls:
274
- if not value:
275
- continue
276
- if key in ("workflow_params", "tags", "workflow_engine_parameters"):
277
- body[key] = json.loads(value)
278
- else:
279
- body[key] = value
280
- except Exception as e:
281
- raise MalformedRequestException(f"Error reading parameter '{key}': {e}")
282
-
241
+ for k, v in args.items():
242
+ if k == "workflow_attachment":
243
+ for file in (v or []):
244
+ dest = os.path.join(temp_dir, self.secure_path(file.filename))
245
+ if not os.path.isdir(os.path.dirname(dest)):
246
+ os.makedirs(os.path.dirname(dest))
247
+ self.log_for_run(
248
+ run_id,
249
+ f"Staging attachment '{file.filename}' to '{dest}'",
250
+ )
251
+ file.save(dest)
252
+ has_attachments = True
253
+ body["workflow_attachment"] = (
254
+ "file://%s" % temp_dir
255
+ ) # Reference to temp working dir.
256
+ elif k in ("workflow_params", "tags", "workflow_engine_parameters"):
257
+ if v is not None:
258
+ body[k] = json.loads(v)
259
+ else:
260
+ body[k] = v
283
261
  if "workflow_url" in body:
284
262
  url, ref = urldefrag(body["workflow_url"])
285
263
  if ":" not in url:
@@ -502,7 +502,7 @@ class ToilBackend(WESBackend):
502
502
  }
503
503
 
504
504
  @handle_errors
505
- def run_workflow(self) -> dict[str, str]:
505
+ def run_workflow(self, **args: Any) -> dict[str, str]:
506
506
  """Run a workflow."""
507
507
  run_id = self.run_id_prefix + uuid.uuid4().hex
508
508
  run = self._get_run(run_id, should_exists=False)
@@ -514,7 +514,7 @@ class ToilBackend(WESBackend):
514
514
  # stage the uploaded files to the execution directory, so that we can run the workflow file directly
515
515
  temp_dir = run.exec_dir
516
516
  try:
517
- _, request = self.collect_attachments(run_id, temp_dir=temp_dir)
517
+ _, request = self.collect_attachments(args, run_id, temp_dir=temp_dir)
518
518
  except ValueError:
519
519
  run.clean_up()
520
520
  raise
toil/test/__init__.py CHANGED
@@ -467,7 +467,7 @@ def needs_google_storage(test_item: MT) -> MT:
467
467
  """
468
468
  test_item = _mark_test("google_storage", needs_online(test_item))
469
469
  try:
470
- from google.cloud import storage # type: ignore[import-untyped]
470
+ import google.clould.storage # type: ignore[import-untyped]
471
471
  except ImportError:
472
472
  return unittest.skip(
473
473
  "Install Toil with the 'google' extra to include this test."
@@ -619,7 +619,7 @@ def needs_htcondor(test_item: MT) -> MT:
619
619
  """Use a decorator before test classes or methods to run only if the HTCondor is installed."""
620
620
  test_item = _mark_test("htcondor", test_item)
621
621
  try:
622
- import htcondor # type: ignore[import-not-found]
622
+ import htcondor # type: ignore
623
623
 
624
624
  htcondor.Collector(os.getenv("TOIL_HTCONDOR_COLLECTOR")).query(
625
625
  constraint="False"
@@ -42,8 +42,6 @@ from toil.batchSystems.registry import (
42
42
  add_batch_system_factory,
43
43
  get_batch_system,
44
44
  get_batch_systems,
45
- restore_batch_system_plugin_state,
46
- save_batch_system_plugin_state,
47
45
  )
48
46
  from toil.batchSystems.singleMachine import SingleMachineBatchSystem
49
47
  from toil.common import Config, Toil
@@ -69,6 +67,7 @@ from toil.test import (
69
67
  pslow,
70
68
  pneeds_mesos,
71
69
  )
70
+ from toil.lib.plugins import remove_plugin
72
71
 
73
72
  import pytest
74
73
 
@@ -97,15 +96,9 @@ class BatchSystemPluginTest(ToilTest):
97
96
  Class for testing batch system plugin functionality.
98
97
  """
99
98
 
100
- def setUp(self) -> None:
101
- # Save plugin state so our plugin doesn't stick around after the test
102
- # (and create duplicate options)
103
- self.__state = save_batch_system_plugin_state()
104
- super().setUp()
105
-
106
99
  def tearDown(self) -> None:
107
100
  # Restore plugin state
108
- restore_batch_system_plugin_state(self.__state)
101
+ remove_plugin("batch_system", "testBatchSystem")
109
102
  super().tearDown()
110
103
 
111
104
  def test_add_batch_system_factory(self) -> None:
@@ -26,6 +26,7 @@ from toil.batchSystems.registry import add_batch_system_factory
26
26
  from toil.common import Toil, addOptions
27
27
  from toil.job import JobDescription
28
28
  from toil.test import ToilTest
29
+ from toil.lib.plugins import remove_plugin
29
30
 
30
31
  logger = logging.getLogger(__name__)
31
32
 
@@ -68,6 +69,11 @@ class FakeBatchSystem(BatchSystemCleanupSupport):
68
69
 
69
70
 
70
71
  class BatchSystemPluginTest(ToilTest):
72
+ def tearDown(self) -> None:
73
+ # Restore plugin state
74
+ remove_plugin("batch_system", "fake")
75
+ super().tearDown()
76
+
71
77
  def test_batchsystem_plugin_installable(self):
72
78
  """
73
79
  Test that installing a batch system plugin works.
@@ -76,6 +82,7 @@ class BatchSystemPluginTest(ToilTest):
76
82
 
77
83
  def fake_batch_system_factory() -> type[AbstractBatchSystem]:
78
84
  return FakeBatchSystem
85
+
79
86
 
80
87
  add_batch_system_factory("fake", fake_batch_system_factory)
81
88
 
toil/test/cwl/cwlTest.py CHANGED
@@ -45,9 +45,11 @@ from toil.cwl.utils import (
45
45
  download_structure,
46
46
  visit_cwl_class_and_reduce,
47
47
  visit_top_cwl_class,
48
+ remove_redundant_mounts
48
49
  )
49
50
  from toil.fileStores import FileID
50
51
  from toil.fileStores.abstractFileStore import AbstractFileStore
52
+ from toil.job import WorkerImportJob
51
53
  from toil.lib.threading import cpu_count
52
54
  from toil.test import (
53
55
  get_data,
@@ -1112,12 +1114,13 @@ def cwl_v1_0_spec(tmp_path: Path) -> Generator[Path]:
1112
1114
  finally:
1113
1115
  pass # no cleanup
1114
1116
 
1115
-
1117
+ @pytest.mark.integrative
1118
+ @pytest.mark.conformance
1116
1119
  @needs_cwl
1117
1120
  @needs_online
1118
1121
  @pytest.mark.cwl
1119
1122
  @pytest.mark.online
1120
- class TestCWLv10:
1123
+ class TestCWLv10Conformance:
1121
1124
  """
1122
1125
  Run the CWL 1.0 conformance tests in various environments.
1123
1126
  """
@@ -1295,11 +1298,13 @@ def cwl_v1_1_spec(tmp_path: Path) -> Generator[Path]:
1295
1298
  pass # no cleanup
1296
1299
 
1297
1300
 
1301
+ @pytest.mark.integrative
1302
+ @pytest.mark.conformance
1298
1303
  @needs_cwl
1299
1304
  @needs_online
1300
1305
  @pytest.mark.cwl
1301
1306
  @pytest.mark.online
1302
- class TestCWLv11:
1307
+ class TestCWLv11Conformance:
1303
1308
  """
1304
1309
  Run the CWL 1.1 conformance tests in various environments.
1305
1310
  """
@@ -1383,11 +1388,13 @@ def cwl_v1_2_spec(tmp_path: Path) -> Generator[Path]:
1383
1388
  pass # no cleanup
1384
1389
 
1385
1390
 
1391
+ @pytest.mark.integrative
1392
+ @pytest.mark.conformance
1386
1393
  @needs_cwl
1387
1394
  @needs_online
1388
1395
  @pytest.mark.cwl
1389
1396
  @pytest.mark.online
1390
- class TestCWLv12:
1397
+ class TestCWLv12Conformance:
1391
1398
  """
1392
1399
  Run the CWL 1.2 conformance tests in various environments.
1393
1400
  """
@@ -1525,7 +1532,7 @@ class TestCWLv12:
1525
1532
  TOIL_WES_ENDPOINT=http://localhost:8080 \
1526
1533
  TOIL_WES_USER=test \
1527
1534
  TOIL_WES_PASSWORD=password \
1528
- python -m pytest src/toil/test/cwl/cwlTest.py::TestCWLv12::test_wes_server_cwl_conformance -vv --log-level INFO --log-cli-level INFO
1535
+ python -m pytest src/toil/test/cwl/cwlTest.py::TestCWLv12Conformance::test_wes_server_cwl_conformance -vv --log-level INFO --log-cli-level INFO
1529
1536
  """
1530
1537
  endpoint = os.environ.get("TOIL_WES_ENDPOINT")
1531
1538
  extra_args = [f"--wes_endpoint={endpoint}"]
@@ -1906,6 +1913,134 @@ def test_visit_cwl_class_and_reduce() -> None:
1906
1913
  assert up_child_count == 2
1907
1914
 
1908
1915
 
1916
+ @needs_cwl
1917
+ @pytest.mark.cwl
1918
+ @pytest.mark.cwl_small
1919
+ def test_trim_mounts_op_nonredundant() -> None:
1920
+ """
1921
+ Make sure we don't remove all non-duplicate listings
1922
+ """
1923
+ s: CWLObjectType = {"class": "Directory", "basename": "directory", "listing": [{"class": "File", "basename": "file", "contents": "hello world"}]}
1924
+ remove_redundant_mounts(s)
1925
+
1926
+ # nothing should have been removed
1927
+ assert isinstance(s['listing'], list)
1928
+ assert len(s['listing']) == 1
1929
+
1930
+ @needs_cwl
1931
+ @pytest.mark.cwl
1932
+ @pytest.mark.cwl_small
1933
+ def test_trim_mounts_op_redundant() -> None:
1934
+ """
1935
+ Make sure we remove all duplicate listings
1936
+ """
1937
+ s: CWLObjectType = {
1938
+ "class": "Directory",
1939
+ "location": "file:///home/heaucques/Documents/toil/test_dir",
1940
+ "basename": "test_dir",
1941
+ "listing": [
1942
+ {
1943
+ "class": "Directory",
1944
+ "location": "file:///home/heaucques/Documents/toil/test_dir/nested_dir",
1945
+ "basename": "nested_dir",
1946
+ "listing": [],
1947
+ "path": "/home/heaucques/Documents/toil/test_dir/nested_dir"
1948
+ },
1949
+ {
1950
+ "class": "File",
1951
+ "location": "file:///home/heaucques/Documents/toil/test_dir/test_file",
1952
+ "basename": "test_file",
1953
+ "size": 0,
1954
+ "nameroot": "test_file",
1955
+ "nameext": "",
1956
+ "path": "/home/heaucques/Documents/toil/test_dir/test_file",
1957
+ "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709"
1958
+ }
1959
+ ],
1960
+ "path": "/home/heaucques/Documents/toil/test_dir"
1961
+ }
1962
+ remove_redundant_mounts(s)
1963
+
1964
+ # everything should have been removed
1965
+ assert isinstance(s['listing'], list)
1966
+ assert len(s['listing']) == 0
1967
+
1968
+ @needs_cwl
1969
+ @pytest.mark.cwl
1970
+ @pytest.mark.cwl_small
1971
+ def test_trim_mounts_op_partially_redundant() -> None:
1972
+ """
1973
+ Make sure we remove only the redundant listings in the CWL object and leave nonredundant listings intact
1974
+ """
1975
+ s: CWLObjectType = {
1976
+ "class": "Directory",
1977
+ "location": "file:///home/heaucques/Documents/toil/test_dir",
1978
+ "basename": "test_dir",
1979
+ "listing": [
1980
+ {
1981
+ "class": "Directory",
1982
+ "location": "file:///home/heaucques/Documents/thing",
1983
+ "basename": "thing2",
1984
+ "listing": [],
1985
+ "path": "/home/heaucques/Documents/toil/thing2"
1986
+ },
1987
+ {
1988
+ "class": "File",
1989
+ "location": "file:///home/heaucques/Documents/toil/test_dir/test_file",
1990
+ "basename": "test_file",
1991
+ "size": 0,
1992
+ "nameroot": "test_file",
1993
+ "nameext": "",
1994
+ "path": "/home/heaucques/Documents/toil/test_dir/test_file",
1995
+ "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709"
1996
+ }
1997
+ ],
1998
+ "path": "/home/heaucques/Documents/toil/test_dir"
1999
+ }
2000
+ remove_redundant_mounts(s)
2001
+
2002
+ # everything except the nested directory should be removed
2003
+ assert isinstance(s['listing'], list)
2004
+ assert len(s['listing']) == 1
2005
+
2006
+ @needs_cwl
2007
+ @pytest.mark.cwl
2008
+ @pytest.mark.cwl_small
2009
+ def test_trim_mounts_op_mixed_urls_and_paths() -> None:
2010
+ """
2011
+ Ensure we remove redundant listings in certain edge cases
2012
+ """
2013
+ # Edge cases around encoding:
2014
+ # Ensure URL decoded file URIs match the bare path equivalent. Both of these paths should have the same shared directory
2015
+ s: CWLObjectType = {"class": "Directory", "basename": "123", "location": "file:///tmp/%25/123", "listing": [{"class": "File", "path": "/tmp/%/123/456", "basename": "456"}]}
2016
+ remove_redundant_mounts(s)
2017
+ assert isinstance(s['listing'], list)
2018
+ assert len(s['listing']) == 0
2019
+
2020
+ @needs_cwl
2021
+ @pytest.mark.cwl
2022
+ @pytest.mark.cwl_small
2023
+ def test_trim_mounts_op_decodable_paths() -> None:
2024
+ """"""
2025
+ # Ensure path names don't get unnecessarily decoded
2026
+ s: CWLObjectType = {"class": "Directory", "basename": "dir", "path": "/tmp/cat%2Ftag/dir", "listing": [{"class": "File", "path": "/tmp/cat/tag/dir/file", "basename": "file"}]}
2027
+ remove_redundant_mounts(s)
2028
+ assert isinstance(s['listing'], list)
2029
+ assert len(s['listing']) == 1
2030
+
2031
+ @needs_cwl
2032
+ @pytest.mark.cwl
2033
+ @pytest.mark.cwl_small
2034
+ def test_trim_mounts_op_multiple_encodings() -> None:
2035
+ # Ensure differently encoded URLs are properly decoded
2036
+ s: CWLObjectType = {"class": "Directory", "basename": "dir", "location": "file:///tmp/cat%2Ftag/dir", "listing": [{"class": "File", "location": "file:///tmp/cat%2ftag/dir/file", "basename": "file"}]}
2037
+ remove_redundant_mounts(s)
2038
+ assert isinstance(s['listing'], list)
2039
+ assert len(s['listing']) == 0
2040
+
2041
+
2042
+
2043
+
1909
2044
  @needs_cwl
1910
2045
  @pytest.mark.cwl
1911
2046
  @pytest.mark.cwl_small
@@ -2010,12 +2145,16 @@ def test_import_on_workers() -> None:
2010
2145
 
2011
2146
  with get_data("test/cwl/download.cwl") as cwl_file:
2012
2147
  with get_data("test/cwl/directory/directory/file.txt") as file_path:
2148
+ # To make sure we see every job issued with a leader log message
2149
+ # that we can then detect for the test, we need to turn off
2150
+ # chaining.
2013
2151
  args = [
2014
2152
  "--runImportsOnWorkers",
2015
2153
  "--importWorkersDisk=10MiB",
2016
2154
  "--realTimeLogging=True",
2017
2155
  "--logLevel=INFO",
2018
2156
  "--logColors=False",
2157
+ "--disableChaining=True",
2019
2158
  str(cwl_file),
2020
2159
  "--input",
2021
2160
  str(file_path),
@@ -2024,6 +2163,29 @@ def test_import_on_workers() -> None:
2024
2163
 
2025
2164
  assert detector.detected is True
2026
2165
 
2166
+ @needs_cwl
2167
+ @pytest.mark.cwl
2168
+ @pytest.mark.cwl_small
2169
+ def test_missing_tmpdir_and_tmp_outdir(tmp_path: Path) -> None:
2170
+ """
2171
+ tmpdir_prefix and tmp_outdir_prefix do not need to exist prior to running the workflow
2172
+ """
2173
+ tmpdir_prefix = os.path.join(tmp_path, "tmpdir/blah")
2174
+ tmp_outdir_prefix = os.path.join(tmp_path, "tmp_outdir/blah")
2175
+
2176
+ assert not os.path.exists(os.path.dirname(tmpdir_prefix))
2177
+ assert not os.path.exists(os.path.dirname(tmp_outdir_prefix))
2178
+ with get_data("test/cwl/echo_string.cwl") as cwl_file:
2179
+ cmd = [
2180
+ "toil-cwl-runner",
2181
+ f"--jobStore=file:{tmp_path / 'jobstore'}",
2182
+ "--strict-memory-limit",
2183
+ f'--tmpdir-prefix={tmpdir_prefix}',
2184
+ f'--tmp-outdir-prefix={tmp_outdir_prefix}',
2185
+ str(cwl_file),
2186
+ ]
2187
+ p = subprocess.run(cmd)
2188
+ assert p.returncode == 0
2027
2189
 
2028
2190
  # StreamHandler is generic, _typeshed doesn't exist at runtime, do a bit of typing trickery, see https://github.com/python/typeshed/issues/5680
2029
2191
  if TYPE_CHECKING:
@@ -2036,7 +2198,7 @@ else:
2036
2198
 
2037
2199
  class ImportWorkersMessageHandler(_stream_handler):
2038
2200
  """
2039
- Detect the import workers log message and set a flag.
2201
+ Detect whether any WorkerImportJob jobs ran during a workflow.
2040
2202
  """
2041
2203
 
2042
2204
  def __init__(self) -> None:
@@ -2045,7 +2207,18 @@ class ImportWorkersMessageHandler(_stream_handler):
2045
2207
  super().__init__(sys.stderr)
2046
2208
 
2047
2209
  def emit(self, record: logging.LogRecord) -> None:
2048
- if (record.msg % record.args).startswith(
2049
- "Issued job 'CWLImportJob' CWLImportJob"
2210
+ # We get the job name from the class since we already started failing
2211
+ # this test once due to it being renamed.
2212
+ try:
2213
+ formatted = record.getMessage()
2214
+ except TypeError as e:
2215
+ # The log message has the wrong number of items for its fields.
2216
+ # Complain in a way we could figure out.
2217
+ raise RuntimeError(
2218
+ f"Log message {record.msg} has wrong number of "
2219
+ f"fields in {record.args}"
2220
+ ) from e
2221
+ if formatted.startswith(
2222
+ f"Issued job '{WorkerImportJob.__name__}'"
2050
2223
  ):
2051
2224
  self.detected = True
@@ -36,8 +36,9 @@ class ToilDocumentationTest(ToilTest):
36
36
 
37
37
  def checkExitCode(self, script, extra_args: list[str] = []):
38
38
  program = os.path.join(self.directory, "scripts", script)
39
+ job_store = self._getTestJobStorePath()
39
40
  process = subprocess.Popen(
40
- [python, program, "file:my-jobstore", "--clean=always"] + extra_args,
41
+ [python, program, f"file:{job_store}", "--clean=always"] + extra_args,
41
42
  stdout=subprocess.PIPE,
42
43
  stderr=subprocess.PIPE,
43
44
  )
@@ -0,0 +1,69 @@
1
+ # Copyright (C) 2015-2022 Regents of the University of California
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import getpass
15
+ import logging
16
+
17
+ from pytest_httpserver import HTTPServer
18
+
19
+ from toil.lib.misc import get_user_name
20
+ from toil.lib.url import URLAccess
21
+ from toil.test import needs_aws_s3, needs_online
22
+
23
+
24
+ logger = logging.getLogger(__name__)
25
+ logging.basicConfig(level=logging.DEBUG)
26
+
27
+ class TestURLAccess():
28
+ """
29
+ Test URLAccess class handling read, list,
30
+ and checking the size/existence of resources at given URL
31
+ """
32
+
33
+ def test_get_url_access(self, httpserver: HTTPServer) -> None:
34
+ httpserver.expect_request("/some_url").respond_with_data("Yep that's a URL")
35
+ file_url = httpserver.url_for("/some_url")
36
+ assert URLAccess.url_exists(file_url)
37
+
38
+ @needs_aws_s3
39
+ def test_get_size(self) -> None:
40
+ size = URLAccess.get_size("s3://toil-datasets/hello.txt")
41
+ assert isinstance(size, int)
42
+ assert size > 0
43
+
44
+ @needs_aws_s3
45
+ def test_get_is_directory(self) -> None:
46
+ assert not URLAccess.get_is_directory("s3://toil-datasets/hello.txt")
47
+
48
+ @needs_aws_s3
49
+ def test_list_url(self) -> None:
50
+ test_dir = URLAccess.list_url("s3://1000genomes/")
51
+ assert isinstance(test_dir, list)
52
+ assert len(test_dir) > 0
53
+
54
+ @needs_aws_s3
55
+ def test_read_from_url(self) -> None:
56
+ import io
57
+ output = io.BytesIO()
58
+ size, executable = URLAccess.read_from_url("s3://toil-datasets/hello.txt", output)
59
+ assert isinstance(size, int)
60
+ assert size > 0
61
+ assert not executable
62
+ assert len(output.getvalue()) > 0
63
+
64
+ @needs_aws_s3
65
+ def test_open_url(self) -> None:
66
+ with URLAccess.open_url("s3://toil-datasets/hello.txt") as readable:
67
+ content = readable.read()
68
+ assert isinstance(content, bytes)
69
+ assert len(content) > 0
@@ -0,0 +1,105 @@
1
+ # Copyright (C) 2015-2025 Regents of the University of California
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ from typing import IO, Optional, Union
16
+
17
+ from configargparse import ArgParser, ArgumentParser
18
+
19
+ from toil.batchSystems.abstractBatchSystem import (
20
+ AbstractBatchSystem,
21
+ UpdatedBatchJobInfo,
22
+ )
23
+ from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
24
+ from toil.batchSystems.options import OptionSetter
25
+ from toil.batchSystems.registry import add_batch_system_factory
26
+ from toil.common import Toil, addOptions
27
+ from toil.job import JobDescription
28
+
29
+ import io
30
+ from urllib.parse import ParseResult
31
+ from toil.test import ToilTest
32
+ from toil.lib.url import URLAccess
33
+ from toil.lib.plugins import register_plugin, remove_plugin
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ class FakeURLPlugin(URLAccess):
38
+ @classmethod
39
+ def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
40
+ return url.scheme == "fake"
41
+
42
+ @classmethod
43
+ def _url_exists(cls, url: ParseResult) -> bool:
44
+ return url.netloc == "exists"
45
+
46
+ @classmethod
47
+ def _get_size(cls, url: ParseResult) -> int:
48
+ return 1234
49
+
50
+ @classmethod
51
+ def _get_is_directory(cls, url: ParseResult) -> bool:
52
+ return url.path.endswith("/")
53
+
54
+ @classmethod
55
+ def _list_url(cls, url: ParseResult) -> list[str]:
56
+ return ["file1.txt", "subdir/"]
57
+
58
+ @classmethod
59
+ def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> tuple[int, bool]:
60
+ content = b"hello world"
61
+ writable.write(content)
62
+ return len(content), False
63
+
64
+ @classmethod
65
+ def _open_url(cls, url: ParseResult) -> IO[bytes]:
66
+ return io.BytesIO(b"hello world")
67
+
68
+ @classmethod
69
+ def _write_to_url(cls, readable: Union[IO[bytes], IO[str]], url: ParseResult, executable: bool = False) -> None:
70
+ pass
71
+
72
+
73
+ class TestURLAccess(ToilTest):
74
+ def setUp(self) -> None:
75
+ super().setUp()
76
+ register_plugin("url_access", "fake", lambda: FakeURLPlugin)
77
+
78
+ def tearDown(self) -> None:
79
+ remove_plugin("url_access", "fake")
80
+ super().tearDown()
81
+
82
+ def test_url_exists(self) -> None:
83
+ assert URLAccess.url_exists("fake://exists/resource") == True
84
+ assert URLAccess.url_exists("fake://missing/resource") == False
85
+
86
+ def test_get_size(self) -> None:
87
+ assert URLAccess.get_size("fake://any/resource") == 1234
88
+
89
+ def test_get_is_directory(self) -> None:
90
+ assert URLAccess.get_is_directory("fake://any/folder/") == True
91
+ assert URLAccess.get_is_directory("fake://any/file.txt") == False
92
+
93
+ def test_list_url(self) -> None:
94
+ assert URLAccess.list_url("fake://any/folder/") == ["file1.txt", "subdir/"]
95
+
96
+ def test_read_from_url(self) -> None:
97
+ output = io.BytesIO()
98
+ size, _ = URLAccess.read_from_url("fake://any/resource", output)
99
+ assert output.getvalue() == b"hello world"
100
+ assert size == len("hello world")
101
+
102
+ def test_open_url(self) -> None:
103
+ with URLAccess.open_url("fake://any/resource") as stream:
104
+ content = stream.read()
105
+ assert content == b"hello world"
@@ -526,7 +526,7 @@ class AWSAutoscaleTestMultipleNodeTypes(AbstractAWSAutoscaleTest):
526
526
  runCommand = [
527
527
  self.python(),
528
528
  self.script(),
529
- "--fileToSort=/home/s3am/bin/asadmin",
529
+ "--fileToSort=/etc/passwd",
530
530
  "--sortMemory=0.6G",
531
531
  "--mergeMemory=3.0G",
532
532
  ]