torchx-nightly 2023.10.21__py3-none-any.whl → 2025.12.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchx-nightly might be problematic. Click here for more details.

Files changed (110) hide show
  1. torchx/__init__.py +2 -0
  2. torchx/{schedulers/ray/__init__.py → _version.py} +3 -1
  3. torchx/apps/serve/serve.py +2 -0
  4. torchx/apps/utils/booth_main.py +2 -0
  5. torchx/apps/utils/copy_main.py +2 -0
  6. torchx/apps/utils/process_monitor.py +2 -0
  7. torchx/cli/__init__.py +2 -0
  8. torchx/cli/argparse_util.py +38 -3
  9. torchx/cli/cmd_base.py +2 -0
  10. torchx/cli/cmd_cancel.py +2 -0
  11. torchx/cli/cmd_configure.py +2 -0
  12. torchx/cli/cmd_delete.py +30 -0
  13. torchx/cli/cmd_describe.py +2 -0
  14. torchx/cli/cmd_list.py +8 -4
  15. torchx/cli/cmd_log.py +6 -24
  16. torchx/cli/cmd_run.py +269 -45
  17. torchx/cli/cmd_runopts.py +2 -0
  18. torchx/cli/cmd_status.py +12 -1
  19. torchx/cli/cmd_tracker.py +3 -1
  20. torchx/cli/colors.py +2 -0
  21. torchx/cli/main.py +4 -0
  22. torchx/components/__init__.py +3 -8
  23. torchx/components/component_test_base.py +2 -0
  24. torchx/components/dist.py +18 -7
  25. torchx/components/integration_tests/component_provider.py +4 -2
  26. torchx/components/integration_tests/integ_tests.py +2 -0
  27. torchx/components/serve.py +2 -0
  28. torchx/components/structured_arg.py +7 -6
  29. torchx/components/utils.py +15 -4
  30. torchx/distributed/__init__.py +2 -4
  31. torchx/examples/apps/datapreproc/datapreproc.py +2 -0
  32. torchx/examples/apps/lightning/data.py +5 -3
  33. torchx/examples/apps/lightning/model.py +7 -6
  34. torchx/examples/apps/lightning/profiler.py +7 -4
  35. torchx/examples/apps/lightning/train.py +11 -2
  36. torchx/examples/torchx_out_of_sync_training.py +11 -0
  37. torchx/notebook.py +2 -0
  38. torchx/runner/__init__.py +2 -0
  39. torchx/runner/api.py +167 -60
  40. torchx/runner/config.py +43 -10
  41. torchx/runner/events/__init__.py +57 -13
  42. torchx/runner/events/api.py +14 -3
  43. torchx/runner/events/handlers.py +2 -0
  44. torchx/runtime/tracking/__init__.py +2 -0
  45. torchx/runtime/tracking/api.py +2 -0
  46. torchx/schedulers/__init__.py +16 -15
  47. torchx/schedulers/api.py +70 -14
  48. torchx/schedulers/aws_batch_scheduler.py +79 -5
  49. torchx/schedulers/aws_sagemaker_scheduler.py +598 -0
  50. torchx/schedulers/devices.py +17 -4
  51. torchx/schedulers/docker_scheduler.py +43 -11
  52. torchx/schedulers/ids.py +29 -23
  53. torchx/schedulers/kubernetes_mcad_scheduler.py +10 -8
  54. torchx/schedulers/kubernetes_scheduler.py +383 -38
  55. torchx/schedulers/local_scheduler.py +100 -27
  56. torchx/schedulers/lsf_scheduler.py +5 -4
  57. torchx/schedulers/slurm_scheduler.py +336 -20
  58. torchx/schedulers/streams.py +2 -0
  59. torchx/specs/__init__.py +89 -12
  60. torchx/specs/api.py +431 -32
  61. torchx/specs/builders.py +176 -38
  62. torchx/specs/file_linter.py +143 -57
  63. torchx/specs/finder.py +68 -28
  64. torchx/specs/named_resources_aws.py +254 -22
  65. torchx/specs/named_resources_generic.py +2 -0
  66. torchx/specs/overlays.py +106 -0
  67. torchx/specs/test/components/__init__.py +2 -0
  68. torchx/specs/test/components/a/__init__.py +2 -0
  69. torchx/specs/test/components/a/b/__init__.py +2 -0
  70. torchx/specs/test/components/a/b/c.py +2 -0
  71. torchx/specs/test/components/c/__init__.py +2 -0
  72. torchx/specs/test/components/c/d.py +2 -0
  73. torchx/tracker/__init__.py +12 -6
  74. torchx/tracker/api.py +15 -18
  75. torchx/tracker/backend/fsspec.py +2 -0
  76. torchx/util/cuda.py +2 -0
  77. torchx/util/datetime.py +2 -0
  78. torchx/util/entrypoints.py +39 -15
  79. torchx/util/io.py +2 -0
  80. torchx/util/log_tee_helpers.py +210 -0
  81. torchx/util/modules.py +65 -0
  82. torchx/util/session.py +42 -0
  83. torchx/util/shlex.py +2 -0
  84. torchx/util/strings.py +3 -1
  85. torchx/util/types.py +90 -29
  86. torchx/version.py +4 -2
  87. torchx/workspace/__init__.py +2 -0
  88. torchx/workspace/api.py +136 -6
  89. torchx/workspace/dir_workspace.py +2 -0
  90. torchx/workspace/docker_workspace.py +30 -2
  91. torchx_nightly-2025.12.24.dist-info/METADATA +167 -0
  92. torchx_nightly-2025.12.24.dist-info/RECORD +113 -0
  93. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/WHEEL +1 -1
  94. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/entry_points.txt +0 -1
  95. torchx/examples/pipelines/__init__.py +0 -0
  96. torchx/examples/pipelines/kfp/__init__.py +0 -0
  97. torchx/examples/pipelines/kfp/advanced_pipeline.py +0 -287
  98. torchx/examples/pipelines/kfp/dist_pipeline.py +0 -69
  99. torchx/examples/pipelines/kfp/intro_pipeline.py +0 -81
  100. torchx/pipelines/kfp/__init__.py +0 -28
  101. torchx/pipelines/kfp/adapter.py +0 -271
  102. torchx/pipelines/kfp/version.py +0 -17
  103. torchx/schedulers/gcp_batch_scheduler.py +0 -487
  104. torchx/schedulers/ray/ray_common.py +0 -22
  105. torchx/schedulers/ray/ray_driver.py +0 -307
  106. torchx/schedulers/ray_scheduler.py +0 -453
  107. torchx_nightly-2023.10.21.dist-info/METADATA +0 -174
  108. torchx_nightly-2023.10.21.dist-info/RECORD +0 -118
  109. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info/licenses}/LICENSE +0 -0
  110. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/top_level.txt +0 -0
torchx/__init__.py CHANGED
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  from .version import ( # noqa F401; noqa F401
9
11
  __version__ as __version__,
10
12
  TORCHX_IMAGE as IMAGE,
@@ -1,6 +1,8 @@
1
- #!/usr/bin/env python3
2
1
  # Copyright (c) Meta Platforms, Inc. and affiliates.
3
2
  # All rights reserved.
4
3
  #
5
4
  # This source code is licensed under the BSD-style license found in the
6
5
  # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-strict
8
+ BASE_VERSION = "0.8.0dev0"
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import argparse
9
11
  import binascii
10
12
  import os
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import argparse
9
11
  import sys
10
12
  from typing import List
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import argparse
9
11
  import os
10
12
  import shutil
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import argparse
9
11
  import subprocess
10
12
  import sys
torchx/cli/__init__.py CHANGED
@@ -4,6 +4,8 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ # pyre-strict
8
+
7
9
  """
8
10
  The ``torchx`` CLI is a commandline tool around :py:class:`torchx.runner.Runner`.
9
11
  It allows users to launch :py:class:`torchx.specs.AppDef` directly onto
@@ -4,19 +4,27 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ # pyre-strict
8
+
9
+ import logging
10
+ import sys
7
11
  from argparse import Action, ArgumentParser, Namespace
8
- from typing import Any, Dict, Optional, Sequence, Text
12
+ from typing import Any, Dict, List, Optional, Sequence, Set, Text
9
13
 
10
14
  from torchx.runner import config
11
15
 
16
+ logger: logging.Logger = logging.getLogger(__name__)
17
+
12
18
 
13
- class _torchxconfig(Action):
19
+ class torchxconfig(Action):
14
20
  """
15
21
  Custom argparse action that loads default torchx CLI options
16
22
  from .torchxconfig file.
17
23
 
18
24
  """
19
25
 
26
+ called_args: Set[str] = set()
27
+
20
28
  # since this action is used for each argparse argument
21
29
  # load the config section for the subcmd once
22
30
  _subcmd_configs: Dict[str, Dict[str, str]] = {}
@@ -64,13 +72,18 @@ class _torchxconfig(Action):
64
72
  values: Any, # pyre-ignore[2] declared as Any in superclass Action
65
73
  option_string: Optional[str] = None,
66
74
  ) -> None:
75
+ if option_string is not None:
76
+ if option_string in self.called_args:
77
+ logger.error(f"{option_string} is specified more than once")
78
+ sys.exit(1)
79
+ self.called_args.add(option_string)
67
80
  setattr(namespace, self.dest, values)
68
81
 
69
82
 
70
83
  # argparse takes the action as a Type[Action] so we can't have custom constructors
71
84
  # hence for each subcommand we need to subclass the base _torchxconfig Action
72
85
  # this is also how store_true and store_false builtin actions are implemented in argparse
73
- class torchxconfig_run(_torchxconfig):
86
+ class torchxconfig_run(torchxconfig):
74
87
  """
75
88
  Custom action that gets the default argument from .torchxconfig.
76
89
  """
@@ -92,3 +105,25 @@ class torchxconfig_run(_torchxconfig):
92
105
  option_strings=option_strings,
93
106
  **kwargs,
94
107
  )
108
+
109
+
110
+ class ArgOnceAction(Action):
111
+ """
112
+ Custom argparse action only allows argument to be specified once
113
+ """
114
+
115
+ called_args: Set[str] = set()
116
+
117
+ def __call__(
118
+ self,
119
+ parser: ArgumentParser,
120
+ namespace: Namespace,
121
+ values: List[str],
122
+ option_string: Optional[str] = None,
123
+ ) -> None:
124
+ if option_string is not None:
125
+ if option_string in self.called_args:
126
+ logger.error(f"{option_string} is specified more than once")
127
+ sys.exit(1)
128
+ self.called_args.add(option_string)
129
+ setattr(namespace, self.dest, values)
torchx/cli/cmd_base.py CHANGED
@@ -4,6 +4,8 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ # pyre-strict
8
+
7
9
  import abc
8
10
  import argparse
9
11
 
torchx/cli/cmd_cancel.py CHANGED
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import argparse
9
11
  import logging
10
12
 
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import argparse
9
11
  import logging
10
12
  import sys
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # pyre-strict
9
+
10
+ import argparse
11
+ import logging
12
+
13
+ from torchx.cli.cmd_base import SubCommand
14
+ from torchx.runner import get_runner
15
+
16
+ logger: logging.Logger = logging.getLogger(__name__)
17
+
18
+
19
+ class CmdDelete(SubCommand):
20
+ def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
21
+ subparser.add_argument(
22
+ "app_handle",
23
+ type=str,
24
+ help="torchx app handle (e.g. local://session-name/app-id)",
25
+ )
26
+
27
+ def run(self, args: argparse.Namespace) -> None:
28
+ app_handle = args.app_handle
29
+ runner = get_runner()
30
+ runner.delete(app_handle)
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import argparse
9
11
  import dataclasses
10
12
  import logging
torchx/cli/cmd_list.py CHANGED
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import argparse
9
11
  import logging
10
12
 
@@ -19,6 +21,7 @@ logger: logging.Logger = logging.getLogger(__name__)
19
21
 
20
22
  HANDLE_HEADER = "APP HANDLE"
21
23
  STATUS_HEADER = "APP STATUS"
24
+ NAME_HEADER = "APP NAME"
22
25
 
23
26
 
24
27
  class CmdList(SubCommand):
@@ -30,12 +33,13 @@ class CmdList(SubCommand):
30
33
  type=str,
31
34
  default=get_default_scheduler_name(),
32
35
  choices=list(scheduler_names),
33
- help=f"Name of the scheduler to use. One of: [{','.join(scheduler_names)}]."
34
- " For listing app handles for ray scheduler, RAY_ADDRESS env variable should be set.",
36
+ help=f"Name of the scheduler to use. One of: [{','.join(scheduler_names)}].",
35
37
  )
36
38
 
37
39
  def run(self, args: argparse.Namespace) -> None:
38
40
  with get_runner() as runner:
39
41
  apps = runner.list(args.scheduler)
40
- apps_data = [[app.app_handle, str(app.state)] for app in apps]
41
- print(tabulate(apps_data, headers=[HANDLE_HEADER, STATUS_HEADER]))
42
+ apps_data = [[app.app_handle, app.name, str(app.state)] for app in apps]
43
+ print(
44
+ tabulate(apps_data, headers=[HANDLE_HEADER, NAME_HEADER, STATUS_HEADER])
45
+ )
torchx/cli/cmd_log.py CHANGED
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import argparse
9
11
  import logging
10
12
  import re
@@ -21,6 +23,10 @@ from torchx.runner import get_runner, Runner
21
23
  from torchx.schedulers.api import Stream
22
24
  from torchx.specs.api import is_started
23
25
  from torchx.specs.builders import make_app_handle
26
+ from torchx.util.log_tee_helpers import (
27
+ _find_role_replicas as find_role_replicas,
28
+ _prefix_line,
29
+ )
24
30
 
25
31
  from torchx.util.types import none_throws
26
32
 
@@ -37,19 +43,6 @@ def validate(job_identifier: str) -> None:
37
43
  sys.exit(1)
38
44
 
39
45
 
40
- def _prefix_line(prefix: str, line: str) -> str:
41
- """
42
- _prefix_line ensure the prefix is still present even when dealing with return characters
43
- """
44
- if "\r" in line:
45
- line = line.replace("\r", f"\r{prefix}")
46
- if "\n" in line[:-1]:
47
- line = line[:-1].replace("\n", f"\n{prefix}") + line[-1:]
48
- if not line.startswith("\r"):
49
- line = f"{prefix}{line}"
50
- return line
51
-
52
-
53
46
  def print_log_lines(
54
47
  file: TextIO,
55
48
  runner: Runner,
@@ -165,17 +158,6 @@ def get_logs(
165
158
  raise threads_exceptions[0]
166
159
 
167
160
 
168
- def find_role_replicas(
169
- app: specs.AppDef, role_name: Optional[str]
170
- ) -> List[Tuple[str, int]]:
171
- role_replicas = []
172
- for role in app.roles:
173
- if role_name is None or role_name == role.name:
174
- for i in range(role.num_replicas):
175
- role_replicas.append((role.name, i))
176
- return role_replicas
177
-
178
-
179
161
  class CmdLog(SubCommand):
180
162
  def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
181
163
  subparser.add_argument(