yellowdog-python-examples 7.17.3__tar.gz → 7.18.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {yellowdog_python_examples-7.17.3/yellowdog_python_examples.egg-info → yellowdog_python_examples-7.18.0}/PKG-INFO +2 -2
  2. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/README.md +18 -0
  3. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/requirements.txt +1 -1
  4. yellowdog_python_examples-7.18.0/yellowdog_cli/__init__.py +1 -0
  5. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/compare.py +66 -24
  6. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/submit.py +7 -0
  7. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/args.py +12 -0
  8. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/config_types.py +1 -0
  9. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/load_config.py +3 -0
  10. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/printing.py +3 -0
  11. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/property_names.py +9 -0
  12. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/submit_utils.py +79 -0
  13. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0/yellowdog_python_examples.egg-info}/PKG-INFO +2 -2
  14. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_python_examples.egg-info/requires.txt +1 -1
  15. yellowdog_python_examples-7.17.3/yellowdog_cli/__init__.py +0 -1
  16. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/LICENSE +0 -0
  17. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/PYPI_README.md +0 -0
  18. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/pyproject.toml +0 -0
  19. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/setup.cfg +0 -0
  20. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/tests/test_create_remove.py +0 -0
  21. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/tests/test_demos.py +0 -0
  22. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/tests/test_dryruns.py +0 -0
  23. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/tests/test_entrypoints.py +0 -0
  24. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/tests/test_gui.py +0 -0
  25. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/tests/test_list.py +0 -0
  26. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/tests/test_objects.py +0 -0
  27. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/tests/test_variable_processing.py +0 -0
  28. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/abort.py +0 -0
  29. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/admin.py +0 -0
  30. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/boost.py +0 -0
  31. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/cancel.py +0 -0
  32. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/cloudwizard.py +0 -0
  33. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/create.py +0 -0
  34. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/delete.py +0 -0
  35. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/download.py +0 -0
  36. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/follow.py +0 -0
  37. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/format_json.py +0 -0
  38. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/hold.py +0 -0
  39. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/instantiate.py +0 -0
  40. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/jsonnet2json.py +0 -0
  41. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/list.py +0 -0
  42. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/provision.py +0 -0
  43. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/remove.py +0 -0
  44. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/resize.py +0 -0
  45. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/show.py +0 -0
  46. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/shutdown.py +0 -0
  47. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/start.py +0 -0
  48. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/terminate.py +0 -0
  49. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/upload.py +0 -0
  50. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/__init__.py +0 -0
  51. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/check_imports.py +0 -0
  52. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/cloudwizard_aws.py +0 -0
  53. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/cloudwizard_aws_types.py +0 -0
  54. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/cloudwizard_azure.py +0 -0
  55. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/cloudwizard_common.py +0 -0
  56. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/cloudwizard_gcp.py +0 -0
  57. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/compact_json.py +0 -0
  58. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/csv_data.py +0 -0
  59. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/entity_utils.py +0 -0
  60. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/follow_utils.py +0 -0
  61. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/interactive.py +0 -0
  62. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/items.py +0 -0
  63. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/load_resources.py +0 -0
  64. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/misc_utils.py +0 -0
  65. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/provision_utils.py +0 -0
  66. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/rich_console_input_fixed.py +0 -0
  67. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/settings.py +0 -0
  68. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/start_hold_common.py +0 -0
  69. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/type_check.py +0 -0
  70. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/upload_utils.py +0 -0
  71. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/validate_properties.py +0 -0
  72. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/variables.py +0 -0
  73. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/wrapper.py +0 -0
  74. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/ydid_utils.py +0 -0
  75. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_cli/version.py +0 -0
  76. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_python_examples.egg-info/SOURCES.txt +0 -0
  77. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_python_examples.egg-info/dependency_links.txt +0 -0
  78. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_python_examples.egg-info/entry_points.txt +0 -0
  79. {yellowdog_python_examples-7.17.3 → yellowdog_python_examples-7.18.0}/yellowdog_python_examples.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yellowdog-python-examples
3
- Version: 7.17.3
3
+ Version: 7.18.0
4
4
  Summary: Python CLI commands using the YellowDog Python SDK
5
5
  Author-email: YellowDog Limited <support@yellowdog.co>
6
6
  Project-URL: Homepage, https://github.com/yellowdog/python-examples
@@ -19,7 +19,7 @@ Requires-Dist: requests
19
19
  Requires-Dist: rich==13.9.4
20
20
  Requires-Dist: tabulate>=0.9.0
21
21
  Requires-Dist: toml
22
- Requires-Dist: yellowdog-sdk>=9.2.0
22
+ Requires-Dist: yellowdog-sdk>=9.2.3
23
23
  Provides-Extra: jsonnet
24
24
  Requires-Dist: jsonnet; extra == "jsonnet"
25
25
  Provides-Extra: cloudwizard
@@ -630,6 +630,7 @@ All properties are optional except for **`taskType`** (or **`taskTypes`**).
630
630
  | `taskName` | The name to use for the Task. Only usable in the TOML file. Mostly useful in conjunction with CSV Task data. E.g., `"my_task_number_{{task_number}}"`. | Yes | | | |
631
631
  | `taskGroupCount` | Create `taskGroupCount` duplicates of a single Task Group. | Yes | Yes | | |
632
632
  | `taskGroupName` | The name to use for the Task Group. Only usable in the TOML file. E.g., `"my_tg_number_{{task_group_number}}"`. | Yes | | | |
633
+ | `taskRetryErrorMatchers` | A list of error condition combinationss under which Tasks will be retried (up to `maximumTaskRetries`). Retries will always be attempted if the list is empty (the default). See the TOML/JSON section for examples. | Yes | Yes | Yes | |
633
634
  | `taskTimeout` | The timeout in minutes after which an executing Task will be terminated and reported as `FAILED`. E.g. `120.0`. The default is no timeout. | Yes | Yes | Yes | |
634
635
  | `timeout` | As above, but set at the individual Task level, which overrides the group level `taskTimeout` property (if present). | Yes | | | Yes |
635
636
  | `taskType` | The Task Type of a Task. E.g., `"docker"`. | Yes | | | Yes |
@@ -843,6 +844,9 @@ Here's an example of the `workRequirement` section of a TOML configuration file,
843
844
  taskTimeout = 120.0
844
845
  taskType = "docker"
845
846
  tasksPerWorker = 1
847
+ taskRetryErrorMatchers = [
848
+ {taskRetryErrorExitCodes = [143], taskRetryErrorStatuses = ["FAILED"], taskRetryErrorTypes = ["ALLOCATION_LOST"]},
849
+ ]
846
850
  uploadFiles = [{localPath = "file_1.txt", uploadPath = "file_1.txt"}]
847
851
  uploadTaskProcessOutput = true
848
852
  vcpus = [1, 4]
@@ -902,6 +906,13 @@ Showing all possible properties at the Work Requirement level:
902
906
  {"alwaysUpload": false, "destination": "dest_path_2", "source": "out_src_path_2"}
903
907
  ],
904
908
  "taskGroupCount": 5,
909
+ "taskRetryErrorMatchers": [
910
+ {
911
+ "taskRetryErrorExitCodes": [143],
912
+ "taskRetryErrorStatuses" : ["FAILED"],
913
+ "taskRetryErrorTypes": ["ALLOCATION_LOST"]
914
+ }
915
+ ]
905
916
  "taskTimeout": 120.0,
906
917
  "taskTypes": ["docker"],
907
918
  "tasksPerWorker": 1,
@@ -972,6 +983,13 @@ Showing all possible properties at the Task Group level:
972
983
  {"alwaysUpload": true, "destination": "dest_path_1", "source": "out_src_path_1"},
973
984
  {"alwaysUpload": false, "destination": "dest_path_2", "source": "out_src_path_2"}
974
985
  ],
986
+ "taskRetryErrorMatchers": [
987
+ {
988
+ "taskRetryErrorExitCodes": [143],
989
+ "taskRetryErrorStatuses" : ["FAILED"],
990
+ "taskRetryErrorTypes": ["ALLOCATION_LOST"]
991
+ }
992
+ ]
975
993
  "taskTimeout": 120.0,
976
994
  "taskTypes": ["docker"],
977
995
  "tasksPerWorker": 1,
@@ -4,4 +4,4 @@ requests
4
4
  rich == 13.9.4
5
5
  tabulate >= 0.9.0
6
6
  toml
7
- yellowdog-sdk >= 9.2.0
7
+ yellowdog-sdk >= 9.2.3
@@ -0,0 +1 @@
1
+ __version__ = "7.18.0"
@@ -8,13 +8,14 @@ and check for matches.
8
8
  from dataclasses import dataclass
9
9
  from enum import Enum
10
10
  from functools import cache
11
- from typing import List
11
+ from typing import List, Optional
12
12
 
13
13
  from tabulate import tabulate
14
14
  from yellowdog_client.model import (
15
15
  DoubleRange,
16
16
  Node,
17
17
  NodeSearch,
18
+ NodeStatus,
18
19
  TaskGroup,
19
20
  WorkerPool,
20
21
  WorkRequirement,
@@ -31,14 +32,15 @@ from yellowdog_cli.utils.wrapper import ARGS_PARSER, CLIENT, main_wrapper
31
32
  from yellowdog_cli.utils.ydid_utils import YDIDType, get_ydid_type
32
33
 
33
34
  NONE_STRING = "NONE"
35
+ EMPTY_STRING = ""
34
36
  UNKNOWN_STRING = "NOT YET KNOWN"
35
37
 
36
38
 
37
39
  class MatchType(Enum):
38
40
  YES = "YES" # Definite match to the worker pool (so far)
39
41
  NO = "NO" # Definite non-match to the worker pool
40
- MAYBE = "MAYBE (no Nodes have registered yet)" # Possible match to the worker pool; no nodes yet
41
- PARTIAL = "PARTIAL (at least one Node matches, but not all)" # Some nodes in the worker pool match
42
+ MAYBE = "MAYBE (no Nodes available)" # Possible match to the worker pool; no nodes available
43
+ PARTIAL = "PARTIAL (one or more Nodes match, but not all)" # Some nodes in the worker pool match
42
44
 
43
45
 
44
46
  @dataclass
@@ -47,6 +49,8 @@ class PropertyMatch:
47
49
  task_group_values: str
48
50
  worker_pool_values: str
49
51
  match: MatchType
52
+ match_count: Optional[int] = None
53
+ total_nodes: Optional[int] = None
50
54
 
51
55
 
52
56
  class MatchReport:
@@ -150,7 +154,14 @@ class MatchReport:
150
154
  p.property_name,
151
155
  p.task_group_values,
152
156
  p.worker_pool_values,
153
- p.match.value,
157
+ (
158
+ f"{p.match.value}"
159
+ + (
160
+ f" ({p.match_count}/{p.total_nodes})"
161
+ if p.match_count is not None
162
+ else ""
163
+ )
164
+ ),
154
165
  ]
155
166
  )
156
167
 
@@ -211,12 +222,12 @@ class WorkerPools:
211
222
  return PropertyMatch(
212
223
  property_name="Worker Tag(s)",
213
224
  task_group_values=(
214
- NONE_STRING
225
+ EMPTY_STRING
215
226
  if task_group.runSpecification.workerTags is None
216
227
  else ", ".join(task_group.runSpecification.workerTags)
217
228
  ),
218
229
  worker_pool_values=(
219
- NONE_STRING
230
+ EMPTY_STRING
220
231
  if worker_pool.properties.workerTag is None
221
232
  else worker_pool.properties.workerTag
222
233
  ),
@@ -247,7 +258,7 @@ class WorkerPools:
247
258
  if node.details.instanceType != ""
248
259
  }
249
260
  worker_pool_values = (
250
- UNKNOWN_STRING
261
+ EMPTY_STRING
251
262
  if len(nodes) == 0
252
263
  else (
253
264
  ", ".join(node_instance_types)
@@ -257,12 +268,12 @@ class WorkerPools:
257
268
  )
258
269
 
259
270
  # Calculate match
271
+ matching_nodes_counter = 0
260
272
  if len(runspec_instance_types) == 0:
261
273
  match_type = MatchType.YES
262
274
  elif len(nodes) == 0:
263
275
  match_type = MatchType.MAYBE
264
276
  else:
265
- matching_nodes_counter = 0
266
277
  for node in nodes:
267
278
  if node.details.instanceType in runspec_instance_types:
268
279
  matching_nodes_counter += 1
@@ -276,12 +287,16 @@ class WorkerPools:
276
287
  return PropertyMatch(
277
288
  property_name="Instance Type(s)",
278
289
  task_group_values=(
279
- NONE_STRING
290
+ EMPTY_STRING
280
291
  if task_group.runSpecification.instanceTypes is None
281
292
  else ", ".join(task_group.runSpecification.instanceTypes)
282
293
  ),
283
294
  worker_pool_values=worker_pool_values,
284
295
  match=match_type,
296
+ match_count=(
297
+ matching_nodes_counter if len(runspec_instance_types) > 0 else None
298
+ ),
299
+ total_nodes=len(nodes) if len(runspec_instance_types) > 0 else None,
285
300
  )
286
301
 
287
302
  def _match_task_types(
@@ -310,10 +325,10 @@ class WorkerPools:
310
325
  )
311
326
 
312
327
  # Calculate match
328
+ matching_node_counter = 0
313
329
  if len(nodes) == 0:
314
330
  match_type = MatchType.MAYBE
315
331
  else:
316
- matching_node_counter = 0
317
332
  for node in nodes:
318
333
  if runspec_task_types <= set(node.details.supportedTaskTypes):
319
334
  matching_node_counter += 1
@@ -333,6 +348,8 @@ class WorkerPools:
333
348
  ),
334
349
  worker_pool_values=worker_pool_values,
335
350
  match=match_type,
351
+ match_count=matching_node_counter,
352
+ total_nodes=len(nodes),
336
353
  )
337
354
 
338
355
  def _match_providers(
@@ -347,12 +364,12 @@ class WorkerPools:
347
364
  node_providers = {node.details.provider.value for node in nodes}
348
365
 
349
366
  # Calculate match
367
+ matching_node_counter = 0
350
368
  if len(runspec_providers) == 0:
351
369
  match_type = MatchType.YES
352
370
  elif len(nodes) == 0:
353
371
  match_type = MatchType.MAYBE
354
372
  else:
355
- matching_node_counter = 0
356
373
  for node in nodes:
357
374
  if node.details.provider in runspec_providers:
358
375
  matching_node_counter += 1
@@ -366,7 +383,7 @@ class WorkerPools:
366
383
  return PropertyMatch(
367
384
  property_name="Provider(s)",
368
385
  task_group_values=(
369
- NONE_STRING
386
+ EMPTY_STRING
370
387
  if task_group.runSpecification.providers is None
371
388
  else ", ".join([x.value for x in task_group.runSpecification.providers])
372
389
  ),
@@ -374,12 +391,14 @@ class WorkerPools:
374
391
  UNKNOWN_STRING
375
392
  if len(nodes) == 0
376
393
  else (
377
- NONE_STRING
394
+ EMPTY_STRING
378
395
  if len(node_providers) == 0
379
396
  else ", ".join(node_providers)
380
397
  )
381
398
  ),
382
399
  match=match_type,
400
+ match_count=matching_node_counter if len(runspec_providers) > 0 else None,
401
+ total_nodes=len(nodes) if len(runspec_providers) > 0 else None,
383
402
  )
384
403
 
385
404
  def _match_regions(
@@ -396,12 +415,12 @@ class WorkerPools:
396
415
  }
397
416
 
398
417
  # Calculate match
418
+ matching_node_counter = 0
399
419
  if len(runspec_regions) == 0:
400
420
  match_type = MatchType.YES
401
421
  elif len(nodes) == 0:
402
422
  match_type = MatchType.MAYBE
403
423
  else:
404
- matching_node_counter = 0
405
424
  for node in nodes:
406
425
  if node.details.region in runspec_regions:
407
426
  matching_node_counter += 1
@@ -415,16 +434,18 @@ class WorkerPools:
415
434
  return PropertyMatch(
416
435
  property_name="Region(s)",
417
436
  task_group_values=(
418
- NONE_STRING
437
+ EMPTY_STRING
419
438
  if task_group.runSpecification.regions is None
420
439
  else ", ".join(task_group.runSpecification.regions)
421
440
  ),
422
441
  worker_pool_values=(
423
- UNKNOWN_STRING
442
+ EMPTY_STRING
424
443
  if len(nodes) == 0
425
444
  else ", ".join(node_regions) if len(node_regions) > 0 else NONE_STRING
426
445
  ),
427
446
  match=match_type,
447
+ match_count=matching_node_counter if len(runspec_regions) > 0 else None,
448
+ total_nodes=len(nodes) if len(runspec_regions) > 0 else None,
428
449
  )
429
450
 
430
451
  @staticmethod
@@ -434,12 +455,12 @@ class WorkerPools:
434
455
  return PropertyMatch(
435
456
  property_name="Namespace(s)",
436
457
  task_group_values=(
437
- NONE_STRING
458
+ EMPTY_STRING
438
459
  if task_group.runSpecification.namespaces is None
439
460
  else ", ".join(task_group.runSpecification.namespaces)
440
461
  ),
441
462
  worker_pool_values=(
442
- NONE_STRING if worker_pool.namespace is None else worker_pool.namespace
463
+ EMPTY_STRING if worker_pool.namespace is None else worker_pool.namespace
443
464
  ),
444
465
  match=(
445
466
  MatchType.YES
@@ -457,12 +478,12 @@ class WorkerPools:
457
478
  nodes_ram = {node.details.ram for node in nodes}
458
479
 
459
480
  # Calculate match
481
+ matching_node_counter = 0
460
482
  if task_group.runSpecification.ram is None:
461
483
  match_type = MatchType.YES
462
484
  elif len(nodes) == 0:
463
485
  match_type = MatchType.MAYBE
464
486
  else:
465
- matching_node_counter = 0
466
487
  for node in nodes:
467
488
  if self._check_in_range(
468
489
  node.details.ram, task_group.runSpecification.ram
@@ -478,7 +499,7 @@ class WorkerPools:
478
499
  return PropertyMatch(
479
500
  property_name="RAM (GB)",
480
501
  task_group_values=(
481
- NONE_STRING
502
+ EMPTY_STRING
482
503
  if task_group.runSpecification.ram is None
483
504
  else self._doublerange_str(task_group.runSpecification.ram)
484
505
  ),
@@ -488,6 +509,12 @@ class WorkerPools:
488
509
  else ", ".join([str(node_ram) for node_ram in nodes_ram])
489
510
  ),
490
511
  match=match_type,
512
+ match_count=(
513
+ None
514
+ if task_group.runSpecification.ram is None
515
+ else matching_node_counter
516
+ ),
517
+ total_nodes=None if task_group.runSpecification.ram is None else len(nodes),
491
518
  )
492
519
 
493
520
  def _match_vcpus(
@@ -498,12 +525,12 @@ class WorkerPools:
498
525
  nodes_vcpus = {node.details.vcpus for node in nodes}
499
526
 
500
527
  # Calculate match
528
+ matching_node_counter = 0
501
529
  if task_group.runSpecification.vcpus is None:
502
530
  match_type = MatchType.YES
503
531
  elif len(nodes) == 0:
504
532
  match_type = MatchType.MAYBE
505
533
  else:
506
- matching_node_counter = 0
507
534
  for node in nodes:
508
535
  if self._check_in_range(
509
536
  node.details.vcpus, task_group.runSpecification.vcpus
@@ -519,7 +546,7 @@ class WorkerPools:
519
546
  return PropertyMatch(
520
547
  property_name="vCPUs Count",
521
548
  task_group_values=(
522
- NONE_STRING
549
+ EMPTY_STRING
523
550
  if task_group.runSpecification.vcpus is None
524
551
  else self._doublerange_str(task_group.runSpecification.vcpus)
525
552
  ),
@@ -529,6 +556,14 @@ class WorkerPools:
529
556
  else ", ".join([str(node_vcpus) for node_vcpus in nodes_vcpus])
530
557
  ),
531
558
  match=match_type,
559
+ match_count=(
560
+ None
561
+ if task_group.runSpecification.vcpus is None
562
+ else matching_node_counter
563
+ ),
564
+ total_nodes=(
565
+ None if task_group.runSpecification.vcpus is None else len(nodes)
566
+ ),
532
567
  )
533
568
 
534
569
  @staticmethod
@@ -550,9 +585,14 @@ class WorkerPools:
550
585
 
551
586
  def _get_all_nodes_in_worker_pool(self, worker_pool: WorkerPool) -> List[Node]:
552
587
  """
553
- Return all nodes in the worker pool.
588
+ Return all nodes in the worker pool. Optionally restrict to running nodes only.
554
589
  """
555
- return self._get_all_nodes_in_worker_pool_cached(worker_pool.id)
590
+ nodes = self._get_all_nodes_in_worker_pool_cached(worker_pool.id)
591
+ return (
592
+ [node for node in nodes if node.status == NodeStatus.RUNNING]
593
+ if ARGS_PARSER.running_nodes_only
594
+ else nodes
595
+ )
556
596
 
557
597
  @staticmethod
558
598
  @cache
@@ -683,6 +723,8 @@ def main():
683
723
  f"({work_requirement.id})",
684
724
  override_quiet=True,
685
725
  )
726
+ if ARGS_PARSER.running_nodes_only:
727
+ print_log("Comparing RUNNING nodes in the worker pool(s) only")
686
728
  for task_group in work_requirement.taskGroups:
687
729
  _compare_task_group(task_group, worker_pools)
688
730
 
@@ -23,6 +23,8 @@ from yellowdog_client.model import (
23
23
  RunSpecification,
24
24
  Task,
25
25
  TaskData,
26
+ TaskErrorMatcher,
27
+ TaskErrorType,
26
28
  TaskGroup,
27
29
  TaskInput,
28
30
  TaskInputSource,
@@ -54,6 +56,7 @@ from yellowdog_cli.utils.printing import (
54
56
  print_log,
55
57
  print_numbered_strings,
56
58
  print_warning,
59
+ print_yd_object,
57
60
  )
58
61
  from yellowdog_cli.utils.property_names import *
59
62
  from yellowdog_cli.utils.settings import (
@@ -66,6 +69,7 @@ from yellowdog_cli.utils.settings import (
66
69
  )
67
70
  from yellowdog_cli.utils.submit_utils import (
68
71
  UploadedFiles,
72
+ generate_task_error_matchers_list,
69
73
  generate_task_input_list,
70
74
  generate_taskdata_object,
71
75
  pause_between_batches,
@@ -524,6 +528,9 @@ def create_task_group(
524
528
  NAMESPACES, wr_data.get(NAMESPACES, config_wr.namespaces)
525
529
  )
526
530
  ),
531
+ retryableErrors=generate_task_error_matchers_list(
532
+ config_wr, wr_data, task_group_data
533
+ ),
527
534
  )
528
535
  ctttl_data = check_float_or_int(
529
536
  task_group_data.get(
@@ -1186,6 +1186,13 @@ class CLIParser:
1186
1186
  nargs="+",
1187
1187
  help="the YellowDog ID(s) of the worker pool(s) to compare",
1188
1188
  )
1189
+ parser.add_argument(
1190
+ "--running-nodes-only",
1191
+ "-R",
1192
+ action="store_true",
1193
+ required=False,
1194
+ help="only include RUNNING nodes in the specified worker pools in the comparison",
1195
+ )
1189
1196
 
1190
1197
  self.args = parser.parse_args()
1191
1198
 
@@ -1722,6 +1729,11 @@ class CLIParser:
1722
1729
  def terminate(self) -> Optional[bool]:
1723
1730
  return self.args.terminate
1724
1731
 
1732
+ @property
1733
+ @allow_missing_attribute
1734
+ def running_nodes_only(self) -> Optional[bool]:
1735
+ return self.args.running_nodes_only
1736
+
1725
1737
 
1726
1738
  def lookup_module_description(module_name: str) -> Optional[str]:
1727
1739
  """
@@ -63,6 +63,7 @@ class ConfigWorkRequirement:
63
63
  task_group_name: Optional[str] = None
64
64
  task_level_timeout: Optional[float] = None
65
65
  task_name: Optional[str] = None
66
+ task_retry_error_matchers: Optional[List[Dict]] = None
66
67
  task_timeout: Optional[float] = None
67
68
  task_type: Optional[str] = None
68
69
  tasks_per_worker: Optional[int] = None
@@ -318,6 +318,7 @@ def load_config_work_requirement() -> ConfigWorkRequirement:
318
318
  task_group_count=task_group_count,
319
319
  task_group_name=wr_section.get(TASK_GROUP_NAME, None),
320
320
  task_name=wr_section.get(TASK_NAME, None),
321
+ task_retry_error_matchers=wr_section.get(TASK_RETRY_ERROR_MATCHERS, None),
321
322
  task_timeout=wr_section.get(TASK_TIMEOUT, None),
322
323
  task_type=task_type,
323
324
  tasks_per_worker=wr_section.get(TASKS_PER_WORKER, None),
@@ -331,9 +332,11 @@ def load_config_work_requirement() -> ConfigWorkRequirement:
331
332
  wr_name=wr_section.get(WR_NAME, None),
332
333
  wr_tag=wr_section.get(WR_TAG, None),
333
334
  )
335
+
334
336
  except KeyError as e:
335
337
  print_error(f"Missing configuration data: {e}")
336
338
  exit(1)
339
+
337
340
  except Exception as e:
338
341
  print_error(f"{e}")
339
342
  exit(1)
@@ -997,6 +997,9 @@ def sorted_objects(
997
997
  except TypeError:
998
998
  return objects
999
999
 
1000
+ if isinstance(objects[0], Task): # Sort tasks by their task number
1001
+ return sorted(objects, key=lambda x: int(x.id.split(":")[-1]), reverse=reverse)
1002
+
1000
1003
  try:
1001
1004
  return sorted(objects, key=lambda x: x.name, reverse=reverse)
1002
1005
  except:
@@ -79,6 +79,10 @@ TASK_GROUP_NAME = "taskGroupName" # String
79
79
  TASK_GROUP_TAG = "tag" # String
80
80
  TASK_LEVEL_TIMEOUT = "timeout" # Float
81
81
  TASK_NAME = "taskName" # String
82
+ TASK_RETRY_ERROR_EXIT_CODES = "taskRetryErrorExitCodes" # List of Ints
83
+ TASK_RETRY_ERROR_MATCHERS = "taskRetryErrorMatchers" # List of Dicts
84
+ TASK_RETRY_ERROR_STATUSES = "taskRetryErrorStatuses" # List of Strings
85
+ TASK_RETRY_ERROR_TYPES = "taskRetryErrorTypes" # List of Strings
82
86
  TASK_TAG = "tag" # String
83
87
  TASK_TIMEOUT = "taskTimeout" # Float
84
88
  TASK_TYPE = "taskType" # String
@@ -109,6 +113,7 @@ WR_DATA = "workRequirementData" # String
109
113
  WR_NAME = "name" # String
110
114
  WR_TAG = "tag" # String
111
115
 
116
+
112
117
  ALL_KEYS = [
113
118
  ADD_YD_ENV_VARS,
114
119
  ALWAYS_UPLOAD,
@@ -186,6 +191,10 @@ ALL_KEYS = [
186
191
  TASK_GROUP_TAG,
187
192
  TASK_LEVEL_TIMEOUT,
188
193
  TASK_NAME,
194
+ TASK_RETRY_ERROR_EXIT_CODES,
195
+ TASK_RETRY_ERROR_MATCHERS,
196
+ TASK_RETRY_ERROR_STATUSES,
197
+ TASK_RETRY_ERROR_TYPES,
189
198
  TASK_TAG,
190
199
  TASK_TIMEOUT,
191
200
  TASK_TYPE,
@@ -15,13 +15,22 @@ from yellowdog_client.model import (
15
15
  TaskData,
16
16
  TaskDataInput,
17
17
  TaskDataOutput,
18
+ TaskErrorMatcher,
18
19
  TaskInput,
19
20
  TaskInputVerification,
21
+ TaskStatus,
20
22
  )
21
23
 
22
24
  from yellowdog_cli.utils.config_types import ConfigCommon, ConfigWorkRequirement
23
25
  from yellowdog_cli.utils.printing import print_error, print_log
26
+ from yellowdog_cli.utils.property_names import (
27
+ TASK_RETRY_ERROR_EXIT_CODES,
28
+ TASK_RETRY_ERROR_MATCHERS,
29
+ TASK_RETRY_ERROR_STATUSES,
30
+ TASK_RETRY_ERROR_TYPES,
31
+ )
24
32
  from yellowdog_cli.utils.settings import NAMESPACE_OBJECT_STORE_PREFIX_SEPARATOR
33
+ from yellowdog_cli.utils.type_check import check_list
25
34
  from yellowdog_cli.utils.upload_utils import unique_upload_pathname, upload_file_core
26
35
  from yellowdog_cli.utils.variables import process_variable_substitutions_insitu
27
36
  from yellowdog_cli.utils.wrapper import ARGS_PARSER
@@ -331,3 +340,73 @@ def generate_taskdata_object(
331
340
  raise Exception(
332
341
  f"Unable to generate 'taskDataInputs' or 'taskDataOutputs' list: {str(e)}"
333
342
  )
343
+
344
+
345
+ def generate_task_error_matchers_list(
346
+ config_wr: ConfigWorkRequirement, wr_data: dict, tg_data: dict
347
+ ) -> Optional[List[TaskErrorMatcher]]:
348
+ """
349
+ Generate a list of TaskErrorMatcher objects.
350
+ """
351
+ error_matchers: Optional[List[Dict]] = check_list(
352
+ tg_data.get(
353
+ TASK_RETRY_ERROR_MATCHERS,
354
+ wr_data.get(TASK_RETRY_ERROR_MATCHERS, config_wr.task_retry_error_matchers),
355
+ )
356
+ )
357
+
358
+ return (
359
+ None
360
+ if error_matchers is None
361
+ else [
362
+ _generate_task_error_matcher(task_error_matcher_data)
363
+ for task_error_matcher_data in error_matchers
364
+ ]
365
+ )
366
+
367
+
368
+ def _generate_task_error_matcher(task_error_matcher_data: Dict) -> TaskErrorMatcher:
369
+ """
370
+ Generate a TaskErrorMatcher object.
371
+ """
372
+ try:
373
+
374
+ exit_codes_str: Optional[List[int]] = check_list(
375
+ task_error_matcher_data.get(TASK_RETRY_ERROR_EXIT_CODES, None)
376
+ )
377
+ try:
378
+ # Ensure ints
379
+ exit_codes = (
380
+ None
381
+ if exit_codes_str is None
382
+ else [int(exit_code_str) for exit_code_str in exit_codes_str]
383
+ )
384
+ except Exception as e:
385
+ raise Exception(f"Unable to process error exit codes: {e}")
386
+
387
+ statuses_str: Optional[List[str]] = check_list(
388
+ task_error_matcher_data.get(TASK_RETRY_ERROR_STATUSES, None)
389
+ )
390
+ try:
391
+ statuses = (
392
+ None
393
+ if statuses_str is None
394
+ else [TaskStatus(status) for status in statuses_str]
395
+ )
396
+ except Exception as e:
397
+ raise Exception(f"Unable to process error status: {e}")
398
+
399
+ error_types: Optional[List[str]] = check_list(
400
+ task_error_matcher_data.get(TASK_RETRY_ERROR_TYPES, None)
401
+ )
402
+
403
+ return TaskErrorMatcher(
404
+ errorTypes=error_types,
405
+ statusesAtFailure=statuses,
406
+ processExitCodes=exit_codes,
407
+ )
408
+
409
+ except Exception as e:
410
+ raise Exception(
411
+ f"Unable to process task retry error matcher data '{task_error_matcher_data}': {e}"
412
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yellowdog-python-examples
3
- Version: 7.17.3
3
+ Version: 7.18.0
4
4
  Summary: Python CLI commands using the YellowDog Python SDK
5
5
  Author-email: YellowDog Limited <support@yellowdog.co>
6
6
  Project-URL: Homepage, https://github.com/yellowdog/python-examples
@@ -19,7 +19,7 @@ Requires-Dist: requests
19
19
  Requires-Dist: rich==13.9.4
20
20
  Requires-Dist: tabulate>=0.9.0
21
21
  Requires-Dist: toml
22
- Requires-Dist: yellowdog-sdk>=9.2.0
22
+ Requires-Dist: yellowdog-sdk>=9.2.3
23
23
  Provides-Extra: jsonnet
24
24
  Requires-Dist: jsonnet; extra == "jsonnet"
25
25
  Provides-Extra: cloudwizard
@@ -4,7 +4,7 @@ requests
4
4
  rich==13.9.4
5
5
  tabulate>=0.9.0
6
6
  toml
7
- yellowdog-sdk>=9.2.0
7
+ yellowdog-sdk>=9.2.3
8
8
 
9
9
  [cloudwizard]
10
10
  boto3
@@ -1 +0,0 @@
1
- __version__ = "7.17.3"