yellowdog-python-examples 7.17.4__tar.gz → 7.18.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {yellowdog_python_examples-7.17.4/yellowdog_python_examples.egg-info → yellowdog_python_examples-7.18.0}/PKG-INFO +2 -2
  2. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/README.md +18 -0
  3. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/requirements.txt +1 -1
  4. yellowdog_python_examples-7.18.0/yellowdog_cli/__init__.py +1 -0
  5. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/compare.py +62 -21
  6. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/submit.py +7 -0
  7. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/args.py +12 -0
  8. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/config_types.py +1 -0
  9. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/load_config.py +3 -0
  10. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/property_names.py +9 -0
  11. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/submit_utils.py +79 -0
  12. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0/yellowdog_python_examples.egg-info}/PKG-INFO +2 -2
  13. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_python_examples.egg-info/requires.txt +1 -1
  14. yellowdog_python_examples-7.17.4/yellowdog_cli/__init__.py +0 -1
  15. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/LICENSE +0 -0
  16. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/PYPI_README.md +0 -0
  17. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/pyproject.toml +0 -0
  18. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/setup.cfg +0 -0
  19. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/tests/test_create_remove.py +0 -0
  20. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/tests/test_demos.py +0 -0
  21. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/tests/test_dryruns.py +0 -0
  22. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/tests/test_entrypoints.py +0 -0
  23. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/tests/test_gui.py +0 -0
  24. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/tests/test_list.py +0 -0
  25. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/tests/test_objects.py +0 -0
  26. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/tests/test_variable_processing.py +0 -0
  27. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/abort.py +0 -0
  28. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/admin.py +0 -0
  29. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/boost.py +0 -0
  30. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/cancel.py +0 -0
  31. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/cloudwizard.py +0 -0
  32. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/create.py +0 -0
  33. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/delete.py +0 -0
  34. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/download.py +0 -0
  35. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/follow.py +0 -0
  36. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/format_json.py +0 -0
  37. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/hold.py +0 -0
  38. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/instantiate.py +0 -0
  39. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/jsonnet2json.py +0 -0
  40. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/list.py +0 -0
  41. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/provision.py +0 -0
  42. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/remove.py +0 -0
  43. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/resize.py +0 -0
  44. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/show.py +0 -0
  45. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/shutdown.py +0 -0
  46. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/start.py +0 -0
  47. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/terminate.py +0 -0
  48. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/upload.py +0 -0
  49. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/__init__.py +0 -0
  50. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/check_imports.py +0 -0
  51. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/cloudwizard_aws.py +0 -0
  52. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/cloudwizard_aws_types.py +0 -0
  53. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/cloudwizard_azure.py +0 -0
  54. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/cloudwizard_common.py +0 -0
  55. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/cloudwizard_gcp.py +0 -0
  56. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/compact_json.py +0 -0
  57. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/csv_data.py +0 -0
  58. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/entity_utils.py +0 -0
  59. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/follow_utils.py +0 -0
  60. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/interactive.py +0 -0
  61. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/items.py +0 -0
  62. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/load_resources.py +0 -0
  63. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/misc_utils.py +0 -0
  64. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/printing.py +0 -0
  65. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/provision_utils.py +0 -0
  66. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/rich_console_input_fixed.py +0 -0
  67. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/settings.py +0 -0
  68. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/start_hold_common.py +0 -0
  69. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/type_check.py +0 -0
  70. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/upload_utils.py +0 -0
  71. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/validate_properties.py +0 -0
  72. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/variables.py +0 -0
  73. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/wrapper.py +0 -0
  74. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/utils/ydid_utils.py +0 -0
  75. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_cli/version.py +0 -0
  76. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_python_examples.egg-info/SOURCES.txt +0 -0
  77. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_python_examples.egg-info/dependency_links.txt +0 -0
  78. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_python_examples.egg-info/entry_points.txt +0 -0
  79. {yellowdog_python_examples-7.17.4 → yellowdog_python_examples-7.18.0}/yellowdog_python_examples.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yellowdog-python-examples
3
- Version: 7.17.4
3
+ Version: 7.18.0
4
4
  Summary: Python CLI commands using the YellowDog Python SDK
5
5
  Author-email: YellowDog Limited <support@yellowdog.co>
6
6
  Project-URL: Homepage, https://github.com/yellowdog/python-examples
@@ -19,7 +19,7 @@ Requires-Dist: requests
19
19
  Requires-Dist: rich==13.9.4
20
20
  Requires-Dist: tabulate>=0.9.0
21
21
  Requires-Dist: toml
22
- Requires-Dist: yellowdog-sdk>=9.2.1
22
+ Requires-Dist: yellowdog-sdk>=9.2.3
23
23
  Provides-Extra: jsonnet
24
24
  Requires-Dist: jsonnet; extra == "jsonnet"
25
25
  Provides-Extra: cloudwizard
@@ -630,6 +630,7 @@ All properties are optional except for **`taskType`** (or **`taskTypes`**).
630
630
  | `taskName` | The name to use for the Task. Only usable in the TOML file. Mostly useful in conjunction with CSV Task data. E.g., `"my_task_number_{{task_number}}"`. | Yes | | | |
631
631
  | `taskGroupCount` | Create `taskGroupCount` duplicates of a single Task Group. | Yes | Yes | | |
632
632
  | `taskGroupName` | The name to use for the Task Group. Only usable in the TOML file. E.g., `"my_tg_number_{{task_group_number}}"`. | Yes | | | |
633
+ | `taskRetryErrorMatchers` | A list of error condition combinationss under which Tasks will be retried (up to `maximumTaskRetries`). Retries will always be attempted if the list is empty (the default). See the TOML/JSON section for examples. | Yes | Yes | Yes | |
633
634
  | `taskTimeout` | The timeout in minutes after which an executing Task will be terminated and reported as `FAILED`. E.g. `120.0`. The default is no timeout. | Yes | Yes | Yes | |
634
635
  | `timeout` | As above, but set at the individual Task level, which overrides the group level `taskTimeout` property (if present). | Yes | | | Yes |
635
636
  | `taskType` | The Task Type of a Task. E.g., `"docker"`. | Yes | | | Yes |
@@ -843,6 +844,9 @@ Here's an example of the `workRequirement` section of a TOML configuration file,
843
844
  taskTimeout = 120.0
844
845
  taskType = "docker"
845
846
  tasksPerWorker = 1
847
+ taskRetryErrorMatchers = [
848
+ {taskRetryErrorExitCodes = [143], taskRetryErrorStatuses = ["FAILED"], taskRetryErrorTypes = ["ALLOCATION_LOST"]},
849
+ ]
846
850
  uploadFiles = [{localPath = "file_1.txt", uploadPath = "file_1.txt"}]
847
851
  uploadTaskProcessOutput = true
848
852
  vcpus = [1, 4]
@@ -902,6 +906,13 @@ Showing all possible properties at the Work Requirement level:
902
906
  {"alwaysUpload": false, "destination": "dest_path_2", "source": "out_src_path_2"}
903
907
  ],
904
908
  "taskGroupCount": 5,
909
+ "taskRetryErrorMatchers": [
910
+ {
911
+ "taskRetryErrorExitCodes": [143],
912
+ "taskRetryErrorStatuses" : ["FAILED"],
913
+ "taskRetryErrorTypes": ["ALLOCATION_LOST"]
914
+ }
915
+ ]
905
916
  "taskTimeout": 120.0,
906
917
  "taskTypes": ["docker"],
907
918
  "tasksPerWorker": 1,
@@ -972,6 +983,13 @@ Showing all possible properties at the Task Group level:
972
983
  {"alwaysUpload": true, "destination": "dest_path_1", "source": "out_src_path_1"},
973
984
  {"alwaysUpload": false, "destination": "dest_path_2", "source": "out_src_path_2"}
974
985
  ],
986
+ "taskRetryErrorMatchers": [
987
+ {
988
+ "taskRetryErrorExitCodes": [143],
989
+ "taskRetryErrorStatuses" : ["FAILED"],
990
+ "taskRetryErrorTypes": ["ALLOCATION_LOST"]
991
+ }
992
+ ]
975
993
  "taskTimeout": 120.0,
976
994
  "taskTypes": ["docker"],
977
995
  "tasksPerWorker": 1,
@@ -4,4 +4,4 @@ requests
4
4
  rich == 13.9.4
5
5
  tabulate >= 0.9.0
6
6
  toml
7
- yellowdog-sdk >= 9.2.1
7
+ yellowdog-sdk >= 9.2.3
@@ -0,0 +1 @@
1
+ __version__ = "7.18.0"
@@ -8,13 +8,14 @@ and check for matches.
8
8
  from dataclasses import dataclass
9
9
  from enum import Enum
10
10
  from functools import cache
11
- from typing import List
11
+ from typing import List, Optional
12
12
 
13
13
  from tabulate import tabulate
14
14
  from yellowdog_client.model import (
15
15
  DoubleRange,
16
16
  Node,
17
17
  NodeSearch,
18
+ NodeStatus,
18
19
  TaskGroup,
19
20
  WorkerPool,
20
21
  WorkRequirement,
@@ -38,8 +39,8 @@ UNKNOWN_STRING = "NOT YET KNOWN"
38
39
  class MatchType(Enum):
39
40
  YES = "YES" # Definite match to the worker pool (so far)
40
41
  NO = "NO" # Definite non-match to the worker pool
41
- MAYBE = "MAYBE (no Nodes have registered yet)" # Possible match to the worker pool; no nodes yet
42
- PARTIAL = "PARTIAL (at least one Node matches, but not all)" # Some nodes in the worker pool match
42
+ MAYBE = "MAYBE (no Nodes available)" # Possible match to the worker pool; no nodes available
43
+ PARTIAL = "PARTIAL (one or more Nodes match, but not all)" # Some nodes in the worker pool match
43
44
 
44
45
 
45
46
  @dataclass
@@ -48,6 +49,8 @@ class PropertyMatch:
48
49
  task_group_values: str
49
50
  worker_pool_values: str
50
51
  match: MatchType
52
+ match_count: Optional[int] = None
53
+ total_nodes: Optional[int] = None
51
54
 
52
55
 
53
56
  class MatchReport:
@@ -151,7 +154,14 @@ class MatchReport:
151
154
  p.property_name,
152
155
  p.task_group_values,
153
156
  p.worker_pool_values,
154
- p.match.value,
157
+ (
158
+ f"{p.match.value}"
159
+ + (
160
+ f" ({p.match_count}/{p.total_nodes})"
161
+ if p.match_count is not None
162
+ else ""
163
+ )
164
+ ),
155
165
  ]
156
166
  )
157
167
 
@@ -217,7 +227,7 @@ class WorkerPools:
217
227
  else ", ".join(task_group.runSpecification.workerTags)
218
228
  ),
219
229
  worker_pool_values=(
220
- NONE_STRING
230
+ EMPTY_STRING
221
231
  if worker_pool.properties.workerTag is None
222
232
  else worker_pool.properties.workerTag
223
233
  ),
@@ -248,7 +258,7 @@ class WorkerPools:
248
258
  if node.details.instanceType != ""
249
259
  }
250
260
  worker_pool_values = (
251
- UNKNOWN_STRING
261
+ EMPTY_STRING
252
262
  if len(nodes) == 0
253
263
  else (
254
264
  ", ".join(node_instance_types)
@@ -258,12 +268,12 @@ class WorkerPools:
258
268
  )
259
269
 
260
270
  # Calculate match
271
+ matching_nodes_counter = 0
261
272
  if len(runspec_instance_types) == 0:
262
273
  match_type = MatchType.YES
263
274
  elif len(nodes) == 0:
264
275
  match_type = MatchType.MAYBE
265
276
  else:
266
- matching_nodes_counter = 0
267
277
  for node in nodes:
268
278
  if node.details.instanceType in runspec_instance_types:
269
279
  matching_nodes_counter += 1
@@ -277,12 +287,16 @@ class WorkerPools:
277
287
  return PropertyMatch(
278
288
  property_name="Instance Type(s)",
279
289
  task_group_values=(
280
- NONE_STRING
290
+ EMPTY_STRING
281
291
  if task_group.runSpecification.instanceTypes is None
282
292
  else ", ".join(task_group.runSpecification.instanceTypes)
283
293
  ),
284
294
  worker_pool_values=worker_pool_values,
285
295
  match=match_type,
296
+ match_count=(
297
+ matching_nodes_counter if len(runspec_instance_types) > 0 else None
298
+ ),
299
+ total_nodes=len(nodes) if len(runspec_instance_types) > 0 else None,
286
300
  )
287
301
 
288
302
  def _match_task_types(
@@ -311,10 +325,10 @@ class WorkerPools:
311
325
  )
312
326
 
313
327
  # Calculate match
328
+ matching_node_counter = 0
314
329
  if len(nodes) == 0:
315
330
  match_type = MatchType.MAYBE
316
331
  else:
317
- matching_node_counter = 0
318
332
  for node in nodes:
319
333
  if runspec_task_types <= set(node.details.supportedTaskTypes):
320
334
  matching_node_counter += 1
@@ -334,6 +348,8 @@ class WorkerPools:
334
348
  ),
335
349
  worker_pool_values=worker_pool_values,
336
350
  match=match_type,
351
+ match_count=matching_node_counter,
352
+ total_nodes=len(nodes),
337
353
  )
338
354
 
339
355
  def _match_providers(
@@ -348,12 +364,12 @@ class WorkerPools:
348
364
  node_providers = {node.details.provider.value for node in nodes}
349
365
 
350
366
  # Calculate match
367
+ matching_node_counter = 0
351
368
  if len(runspec_providers) == 0:
352
369
  match_type = MatchType.YES
353
370
  elif len(nodes) == 0:
354
371
  match_type = MatchType.MAYBE
355
372
  else:
356
- matching_node_counter = 0
357
373
  for node in nodes:
358
374
  if node.details.provider in runspec_providers:
359
375
  matching_node_counter += 1
@@ -367,7 +383,7 @@ class WorkerPools:
367
383
  return PropertyMatch(
368
384
  property_name="Provider(s)",
369
385
  task_group_values=(
370
- NONE_STRING
386
+ EMPTY_STRING
371
387
  if task_group.runSpecification.providers is None
372
388
  else ", ".join([x.value for x in task_group.runSpecification.providers])
373
389
  ),
@@ -375,12 +391,14 @@ class WorkerPools:
375
391
  UNKNOWN_STRING
376
392
  if len(nodes) == 0
377
393
  else (
378
- NONE_STRING
394
+ EMPTY_STRING
379
395
  if len(node_providers) == 0
380
396
  else ", ".join(node_providers)
381
397
  )
382
398
  ),
383
399
  match=match_type,
400
+ match_count=matching_node_counter if len(runspec_providers) > 0 else None,
401
+ total_nodes=len(nodes) if len(runspec_providers) > 0 else None,
384
402
  )
385
403
 
386
404
  def _match_regions(
@@ -397,12 +415,12 @@ class WorkerPools:
397
415
  }
398
416
 
399
417
  # Calculate match
418
+ matching_node_counter = 0
400
419
  if len(runspec_regions) == 0:
401
420
  match_type = MatchType.YES
402
421
  elif len(nodes) == 0:
403
422
  match_type = MatchType.MAYBE
404
423
  else:
405
- matching_node_counter = 0
406
424
  for node in nodes:
407
425
  if node.details.region in runspec_regions:
408
426
  matching_node_counter += 1
@@ -416,16 +434,18 @@ class WorkerPools:
416
434
  return PropertyMatch(
417
435
  property_name="Region(s)",
418
436
  task_group_values=(
419
- NONE_STRING
437
+ EMPTY_STRING
420
438
  if task_group.runSpecification.regions is None
421
439
  else ", ".join(task_group.runSpecification.regions)
422
440
  ),
423
441
  worker_pool_values=(
424
- UNKNOWN_STRING
442
+ EMPTY_STRING
425
443
  if len(nodes) == 0
426
444
  else ", ".join(node_regions) if len(node_regions) > 0 else NONE_STRING
427
445
  ),
428
446
  match=match_type,
447
+ match_count=matching_node_counter if len(runspec_regions) > 0 else None,
448
+ total_nodes=len(nodes) if len(runspec_regions) > 0 else None,
429
449
  )
430
450
 
431
451
  @staticmethod
@@ -435,12 +455,12 @@ class WorkerPools:
435
455
  return PropertyMatch(
436
456
  property_name="Namespace(s)",
437
457
  task_group_values=(
438
- NONE_STRING
458
+ EMPTY_STRING
439
459
  if task_group.runSpecification.namespaces is None
440
460
  else ", ".join(task_group.runSpecification.namespaces)
441
461
  ),
442
462
  worker_pool_values=(
443
- NONE_STRING if worker_pool.namespace is None else worker_pool.namespace
463
+ EMPTY_STRING if worker_pool.namespace is None else worker_pool.namespace
444
464
  ),
445
465
  match=(
446
466
  MatchType.YES
@@ -458,12 +478,12 @@ class WorkerPools:
458
478
  nodes_ram = {node.details.ram for node in nodes}
459
479
 
460
480
  # Calculate match
481
+ matching_node_counter = 0
461
482
  if task_group.runSpecification.ram is None:
462
483
  match_type = MatchType.YES
463
484
  elif len(nodes) == 0:
464
485
  match_type = MatchType.MAYBE
465
486
  else:
466
- matching_node_counter = 0
467
487
  for node in nodes:
468
488
  if self._check_in_range(
469
489
  node.details.ram, task_group.runSpecification.ram
@@ -489,6 +509,12 @@ class WorkerPools:
489
509
  else ", ".join([str(node_ram) for node_ram in nodes_ram])
490
510
  ),
491
511
  match=match_type,
512
+ match_count=(
513
+ None
514
+ if task_group.runSpecification.ram is None
515
+ else matching_node_counter
516
+ ),
517
+ total_nodes=None if task_group.runSpecification.ram is None else len(nodes),
492
518
  )
493
519
 
494
520
  def _match_vcpus(
@@ -499,12 +525,12 @@ class WorkerPools:
499
525
  nodes_vcpus = {node.details.vcpus for node in nodes}
500
526
 
501
527
  # Calculate match
528
+ matching_node_counter = 0
502
529
  if task_group.runSpecification.vcpus is None:
503
530
  match_type = MatchType.YES
504
531
  elif len(nodes) == 0:
505
532
  match_type = MatchType.MAYBE
506
533
  else:
507
- matching_node_counter = 0
508
534
  for node in nodes:
509
535
  if self._check_in_range(
510
536
  node.details.vcpus, task_group.runSpecification.vcpus
@@ -530,6 +556,14 @@ class WorkerPools:
530
556
  else ", ".join([str(node_vcpus) for node_vcpus in nodes_vcpus])
531
557
  ),
532
558
  match=match_type,
559
+ match_count=(
560
+ None
561
+ if task_group.runSpecification.vcpus is None
562
+ else matching_node_counter
563
+ ),
564
+ total_nodes=(
565
+ None if task_group.runSpecification.vcpus is None else len(nodes)
566
+ ),
533
567
  )
534
568
 
535
569
  @staticmethod
@@ -551,9 +585,14 @@ class WorkerPools:
551
585
 
552
586
  def _get_all_nodes_in_worker_pool(self, worker_pool: WorkerPool) -> List[Node]:
553
587
  """
554
- Return all nodes in the worker pool.
588
+ Return all nodes in the worker pool. Optionally restrict to running nodes only.
555
589
  """
556
- return self._get_all_nodes_in_worker_pool_cached(worker_pool.id)
590
+ nodes = self._get_all_nodes_in_worker_pool_cached(worker_pool.id)
591
+ return (
592
+ [node for node in nodes if node.status == NodeStatus.RUNNING]
593
+ if ARGS_PARSER.running_nodes_only
594
+ else nodes
595
+ )
557
596
 
558
597
  @staticmethod
559
598
  @cache
@@ -684,6 +723,8 @@ def main():
684
723
  f"({work_requirement.id})",
685
724
  override_quiet=True,
686
725
  )
726
+ if ARGS_PARSER.running_nodes_only:
727
+ print_log("Comparing RUNNING nodes in the worker pool(s) only")
687
728
  for task_group in work_requirement.taskGroups:
688
729
  _compare_task_group(task_group, worker_pools)
689
730
 
@@ -23,6 +23,8 @@ from yellowdog_client.model import (
23
23
  RunSpecification,
24
24
  Task,
25
25
  TaskData,
26
+ TaskErrorMatcher,
27
+ TaskErrorType,
26
28
  TaskGroup,
27
29
  TaskInput,
28
30
  TaskInputSource,
@@ -54,6 +56,7 @@ from yellowdog_cli.utils.printing import (
54
56
  print_log,
55
57
  print_numbered_strings,
56
58
  print_warning,
59
+ print_yd_object,
57
60
  )
58
61
  from yellowdog_cli.utils.property_names import *
59
62
  from yellowdog_cli.utils.settings import (
@@ -66,6 +69,7 @@ from yellowdog_cli.utils.settings import (
66
69
  )
67
70
  from yellowdog_cli.utils.submit_utils import (
68
71
  UploadedFiles,
72
+ generate_task_error_matchers_list,
69
73
  generate_task_input_list,
70
74
  generate_taskdata_object,
71
75
  pause_between_batches,
@@ -524,6 +528,9 @@ def create_task_group(
524
528
  NAMESPACES, wr_data.get(NAMESPACES, config_wr.namespaces)
525
529
  )
526
530
  ),
531
+ retryableErrors=generate_task_error_matchers_list(
532
+ config_wr, wr_data, task_group_data
533
+ ),
527
534
  )
528
535
  ctttl_data = check_float_or_int(
529
536
  task_group_data.get(
@@ -1186,6 +1186,13 @@ class CLIParser:
1186
1186
  nargs="+",
1187
1187
  help="the YellowDog ID(s) of the worker pool(s) to compare",
1188
1188
  )
1189
+ parser.add_argument(
1190
+ "--running-nodes-only",
1191
+ "-R",
1192
+ action="store_true",
1193
+ required=False,
1194
+ help="only include RUNNING nodes in the specified worker pools in the comparison",
1195
+ )
1189
1196
 
1190
1197
  self.args = parser.parse_args()
1191
1198
 
@@ -1722,6 +1729,11 @@ class CLIParser:
1722
1729
  def terminate(self) -> Optional[bool]:
1723
1730
  return self.args.terminate
1724
1731
 
1732
+ @property
1733
+ @allow_missing_attribute
1734
+ def running_nodes_only(self) -> Optional[bool]:
1735
+ return self.args.running_nodes_only
1736
+
1725
1737
 
1726
1738
  def lookup_module_description(module_name: str) -> Optional[str]:
1727
1739
  """
@@ -63,6 +63,7 @@ class ConfigWorkRequirement:
63
63
  task_group_name: Optional[str] = None
64
64
  task_level_timeout: Optional[float] = None
65
65
  task_name: Optional[str] = None
66
+ task_retry_error_matchers: Optional[List[Dict]] = None
66
67
  task_timeout: Optional[float] = None
67
68
  task_type: Optional[str] = None
68
69
  tasks_per_worker: Optional[int] = None
@@ -318,6 +318,7 @@ def load_config_work_requirement() -> ConfigWorkRequirement:
318
318
  task_group_count=task_group_count,
319
319
  task_group_name=wr_section.get(TASK_GROUP_NAME, None),
320
320
  task_name=wr_section.get(TASK_NAME, None),
321
+ task_retry_error_matchers=wr_section.get(TASK_RETRY_ERROR_MATCHERS, None),
321
322
  task_timeout=wr_section.get(TASK_TIMEOUT, None),
322
323
  task_type=task_type,
323
324
  tasks_per_worker=wr_section.get(TASKS_PER_WORKER, None),
@@ -331,9 +332,11 @@ def load_config_work_requirement() -> ConfigWorkRequirement:
331
332
  wr_name=wr_section.get(WR_NAME, None),
332
333
  wr_tag=wr_section.get(WR_TAG, None),
333
334
  )
335
+
334
336
  except KeyError as e:
335
337
  print_error(f"Missing configuration data: {e}")
336
338
  exit(1)
339
+
337
340
  except Exception as e:
338
341
  print_error(f"{e}")
339
342
  exit(1)
@@ -79,6 +79,10 @@ TASK_GROUP_NAME = "taskGroupName" # String
79
79
  TASK_GROUP_TAG = "tag" # String
80
80
  TASK_LEVEL_TIMEOUT = "timeout" # Float
81
81
  TASK_NAME = "taskName" # String
82
+ TASK_RETRY_ERROR_EXIT_CODES = "taskRetryErrorExitCodes" # List of Ints
83
+ TASK_RETRY_ERROR_MATCHERS = "taskRetryErrorMatchers" # List of Dicts
84
+ TASK_RETRY_ERROR_STATUSES = "taskRetryErrorStatuses" # List of Strings
85
+ TASK_RETRY_ERROR_TYPES = "taskRetryErrorTypes" # List of Strings
82
86
  TASK_TAG = "tag" # String
83
87
  TASK_TIMEOUT = "taskTimeout" # Float
84
88
  TASK_TYPE = "taskType" # String
@@ -109,6 +113,7 @@ WR_DATA = "workRequirementData" # String
109
113
  WR_NAME = "name" # String
110
114
  WR_TAG = "tag" # String
111
115
 
116
+
112
117
  ALL_KEYS = [
113
118
  ADD_YD_ENV_VARS,
114
119
  ALWAYS_UPLOAD,
@@ -186,6 +191,10 @@ ALL_KEYS = [
186
191
  TASK_GROUP_TAG,
187
192
  TASK_LEVEL_TIMEOUT,
188
193
  TASK_NAME,
194
+ TASK_RETRY_ERROR_EXIT_CODES,
195
+ TASK_RETRY_ERROR_MATCHERS,
196
+ TASK_RETRY_ERROR_STATUSES,
197
+ TASK_RETRY_ERROR_TYPES,
189
198
  TASK_TAG,
190
199
  TASK_TIMEOUT,
191
200
  TASK_TYPE,
@@ -15,13 +15,22 @@ from yellowdog_client.model import (
15
15
  TaskData,
16
16
  TaskDataInput,
17
17
  TaskDataOutput,
18
+ TaskErrorMatcher,
18
19
  TaskInput,
19
20
  TaskInputVerification,
21
+ TaskStatus,
20
22
  )
21
23
 
22
24
  from yellowdog_cli.utils.config_types import ConfigCommon, ConfigWorkRequirement
23
25
  from yellowdog_cli.utils.printing import print_error, print_log
26
+ from yellowdog_cli.utils.property_names import (
27
+ TASK_RETRY_ERROR_EXIT_CODES,
28
+ TASK_RETRY_ERROR_MATCHERS,
29
+ TASK_RETRY_ERROR_STATUSES,
30
+ TASK_RETRY_ERROR_TYPES,
31
+ )
24
32
  from yellowdog_cli.utils.settings import NAMESPACE_OBJECT_STORE_PREFIX_SEPARATOR
33
+ from yellowdog_cli.utils.type_check import check_list
25
34
  from yellowdog_cli.utils.upload_utils import unique_upload_pathname, upload_file_core
26
35
  from yellowdog_cli.utils.variables import process_variable_substitutions_insitu
27
36
  from yellowdog_cli.utils.wrapper import ARGS_PARSER
@@ -331,3 +340,73 @@ def generate_taskdata_object(
331
340
  raise Exception(
332
341
  f"Unable to generate 'taskDataInputs' or 'taskDataOutputs' list: {str(e)}"
333
342
  )
343
+
344
+
345
+ def generate_task_error_matchers_list(
346
+ config_wr: ConfigWorkRequirement, wr_data: dict, tg_data: dict
347
+ ) -> Optional[List[TaskErrorMatcher]]:
348
+ """
349
+ Generate a list of TaskErrorMatcher objects.
350
+ """
351
+ error_matchers: Optional[List[Dict]] = check_list(
352
+ tg_data.get(
353
+ TASK_RETRY_ERROR_MATCHERS,
354
+ wr_data.get(TASK_RETRY_ERROR_MATCHERS, config_wr.task_retry_error_matchers),
355
+ )
356
+ )
357
+
358
+ return (
359
+ None
360
+ if error_matchers is None
361
+ else [
362
+ _generate_task_error_matcher(task_error_matcher_data)
363
+ for task_error_matcher_data in error_matchers
364
+ ]
365
+ )
366
+
367
+
368
+ def _generate_task_error_matcher(task_error_matcher_data: Dict) -> TaskErrorMatcher:
369
+ """
370
+ Generate a TaskErrorMatcher object.
371
+ """
372
+ try:
373
+
374
+ exit_codes_str: Optional[List[int]] = check_list(
375
+ task_error_matcher_data.get(TASK_RETRY_ERROR_EXIT_CODES, None)
376
+ )
377
+ try:
378
+ # Ensure ints
379
+ exit_codes = (
380
+ None
381
+ if exit_codes_str is None
382
+ else [int(exit_code_str) for exit_code_str in exit_codes_str]
383
+ )
384
+ except Exception as e:
385
+ raise Exception(f"Unable to process error exit codes: {e}")
386
+
387
+ statuses_str: Optional[List[str]] = check_list(
388
+ task_error_matcher_data.get(TASK_RETRY_ERROR_STATUSES, None)
389
+ )
390
+ try:
391
+ statuses = (
392
+ None
393
+ if statuses_str is None
394
+ else [TaskStatus(status) for status in statuses_str]
395
+ )
396
+ except Exception as e:
397
+ raise Exception(f"Unable to process error status: {e}")
398
+
399
+ error_types: Optional[List[str]] = check_list(
400
+ task_error_matcher_data.get(TASK_RETRY_ERROR_TYPES, None)
401
+ )
402
+
403
+ return TaskErrorMatcher(
404
+ errorTypes=error_types,
405
+ statusesAtFailure=statuses,
406
+ processExitCodes=exit_codes,
407
+ )
408
+
409
+ except Exception as e:
410
+ raise Exception(
411
+ f"Unable to process task retry error matcher data '{task_error_matcher_data}': {e}"
412
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yellowdog-python-examples
3
- Version: 7.17.4
3
+ Version: 7.18.0
4
4
  Summary: Python CLI commands using the YellowDog Python SDK
5
5
  Author-email: YellowDog Limited <support@yellowdog.co>
6
6
  Project-URL: Homepage, https://github.com/yellowdog/python-examples
@@ -19,7 +19,7 @@ Requires-Dist: requests
19
19
  Requires-Dist: rich==13.9.4
20
20
  Requires-Dist: tabulate>=0.9.0
21
21
  Requires-Dist: toml
22
- Requires-Dist: yellowdog-sdk>=9.2.1
22
+ Requires-Dist: yellowdog-sdk>=9.2.3
23
23
  Provides-Extra: jsonnet
24
24
  Requires-Dist: jsonnet; extra == "jsonnet"
25
25
  Provides-Extra: cloudwizard
@@ -4,7 +4,7 @@ requests
4
4
  rich==13.9.4
5
5
  tabulate>=0.9.0
6
6
  toml
7
- yellowdog-sdk>=9.2.1
7
+ yellowdog-sdk>=9.2.3
8
8
 
9
9
  [cloudwizard]
10
10
  boto3
@@ -1 +0,0 @@
1
- __version__ = "7.17.4"