torchx-nightly 2023.10.21__py3-none-any.whl → 2025.12.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchx-nightly might be problematic. Click here for more details.

Files changed (110) hide show
  1. torchx/__init__.py +2 -0
  2. torchx/{schedulers/ray/__init__.py → _version.py} +3 -1
  3. torchx/apps/serve/serve.py +2 -0
  4. torchx/apps/utils/booth_main.py +2 -0
  5. torchx/apps/utils/copy_main.py +2 -0
  6. torchx/apps/utils/process_monitor.py +2 -0
  7. torchx/cli/__init__.py +2 -0
  8. torchx/cli/argparse_util.py +38 -3
  9. torchx/cli/cmd_base.py +2 -0
  10. torchx/cli/cmd_cancel.py +2 -0
  11. torchx/cli/cmd_configure.py +2 -0
  12. torchx/cli/cmd_delete.py +30 -0
  13. torchx/cli/cmd_describe.py +2 -0
  14. torchx/cli/cmd_list.py +8 -4
  15. torchx/cli/cmd_log.py +6 -24
  16. torchx/cli/cmd_run.py +269 -45
  17. torchx/cli/cmd_runopts.py +2 -0
  18. torchx/cli/cmd_status.py +12 -1
  19. torchx/cli/cmd_tracker.py +3 -1
  20. torchx/cli/colors.py +2 -0
  21. torchx/cli/main.py +4 -0
  22. torchx/components/__init__.py +3 -8
  23. torchx/components/component_test_base.py +2 -0
  24. torchx/components/dist.py +18 -7
  25. torchx/components/integration_tests/component_provider.py +4 -2
  26. torchx/components/integration_tests/integ_tests.py +2 -0
  27. torchx/components/serve.py +2 -0
  28. torchx/components/structured_arg.py +7 -6
  29. torchx/components/utils.py +15 -4
  30. torchx/distributed/__init__.py +2 -4
  31. torchx/examples/apps/datapreproc/datapreproc.py +2 -0
  32. torchx/examples/apps/lightning/data.py +5 -3
  33. torchx/examples/apps/lightning/model.py +7 -6
  34. torchx/examples/apps/lightning/profiler.py +7 -4
  35. torchx/examples/apps/lightning/train.py +11 -2
  36. torchx/examples/torchx_out_of_sync_training.py +11 -0
  37. torchx/notebook.py +2 -0
  38. torchx/runner/__init__.py +2 -0
  39. torchx/runner/api.py +167 -60
  40. torchx/runner/config.py +43 -10
  41. torchx/runner/events/__init__.py +57 -13
  42. torchx/runner/events/api.py +14 -3
  43. torchx/runner/events/handlers.py +2 -0
  44. torchx/runtime/tracking/__init__.py +2 -0
  45. torchx/runtime/tracking/api.py +2 -0
  46. torchx/schedulers/__init__.py +16 -15
  47. torchx/schedulers/api.py +70 -14
  48. torchx/schedulers/aws_batch_scheduler.py +79 -5
  49. torchx/schedulers/aws_sagemaker_scheduler.py +598 -0
  50. torchx/schedulers/devices.py +17 -4
  51. torchx/schedulers/docker_scheduler.py +43 -11
  52. torchx/schedulers/ids.py +29 -23
  53. torchx/schedulers/kubernetes_mcad_scheduler.py +10 -8
  54. torchx/schedulers/kubernetes_scheduler.py +383 -38
  55. torchx/schedulers/local_scheduler.py +100 -27
  56. torchx/schedulers/lsf_scheduler.py +5 -4
  57. torchx/schedulers/slurm_scheduler.py +336 -20
  58. torchx/schedulers/streams.py +2 -0
  59. torchx/specs/__init__.py +89 -12
  60. torchx/specs/api.py +431 -32
  61. torchx/specs/builders.py +176 -38
  62. torchx/specs/file_linter.py +143 -57
  63. torchx/specs/finder.py +68 -28
  64. torchx/specs/named_resources_aws.py +254 -22
  65. torchx/specs/named_resources_generic.py +2 -0
  66. torchx/specs/overlays.py +106 -0
  67. torchx/specs/test/components/__init__.py +2 -0
  68. torchx/specs/test/components/a/__init__.py +2 -0
  69. torchx/specs/test/components/a/b/__init__.py +2 -0
  70. torchx/specs/test/components/a/b/c.py +2 -0
  71. torchx/specs/test/components/c/__init__.py +2 -0
  72. torchx/specs/test/components/c/d.py +2 -0
  73. torchx/tracker/__init__.py +12 -6
  74. torchx/tracker/api.py +15 -18
  75. torchx/tracker/backend/fsspec.py +2 -0
  76. torchx/util/cuda.py +2 -0
  77. torchx/util/datetime.py +2 -0
  78. torchx/util/entrypoints.py +39 -15
  79. torchx/util/io.py +2 -0
  80. torchx/util/log_tee_helpers.py +210 -0
  81. torchx/util/modules.py +65 -0
  82. torchx/util/session.py +42 -0
  83. torchx/util/shlex.py +2 -0
  84. torchx/util/strings.py +3 -1
  85. torchx/util/types.py +90 -29
  86. torchx/version.py +4 -2
  87. torchx/workspace/__init__.py +2 -0
  88. torchx/workspace/api.py +136 -6
  89. torchx/workspace/dir_workspace.py +2 -0
  90. torchx/workspace/docker_workspace.py +30 -2
  91. torchx_nightly-2025.12.24.dist-info/METADATA +167 -0
  92. torchx_nightly-2025.12.24.dist-info/RECORD +113 -0
  93. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/WHEEL +1 -1
  94. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/entry_points.txt +0 -1
  95. torchx/examples/pipelines/__init__.py +0 -0
  96. torchx/examples/pipelines/kfp/__init__.py +0 -0
  97. torchx/examples/pipelines/kfp/advanced_pipeline.py +0 -287
  98. torchx/examples/pipelines/kfp/dist_pipeline.py +0 -69
  99. torchx/examples/pipelines/kfp/intro_pipeline.py +0 -81
  100. torchx/pipelines/kfp/__init__.py +0 -28
  101. torchx/pipelines/kfp/adapter.py +0 -271
  102. torchx/pipelines/kfp/version.py +0 -17
  103. torchx/schedulers/gcp_batch_scheduler.py +0 -487
  104. torchx/schedulers/ray/ray_common.py +0 -22
  105. torchx/schedulers/ray/ray_driver.py +0 -307
  106. torchx/schedulers/ray_scheduler.py +0 -453
  107. torchx_nightly-2023.10.21.dist-info/METADATA +0 -174
  108. torchx_nightly-2023.10.21.dist-info/RECORD +0 -118
  109. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info/licenses}/LICENSE +0 -0
  110. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/top_level.txt +0 -0
torchx/cli/cmd_run.py CHANGED
@@ -4,23 +4,29 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ # pyre-strict
8
+
7
9
  import argparse
10
+ import json
8
11
  import logging
9
12
  import os
10
13
  import sys
11
14
  import threading
12
- from dataclasses import asdict
15
+ from collections import Counter
16
+ from dataclasses import asdict, dataclass, field, fields, MISSING as DATACLASS_MISSING
17
+ from itertools import groupby
13
18
  from pathlib import Path
14
19
  from pprint import pformat
15
- from typing import Dict, List, Optional, Tuple
20
+ from typing import Any, Dict, List, Optional, Tuple
16
21
 
17
22
  import torchx.specs as specs
18
- from torchx.cli.argparse_util import torchxconfig_run
23
+ from torchx.cli.argparse_util import ArgOnceAction, torchxconfig_run
19
24
  from torchx.cli.cmd_base import SubCommand
20
25
  from torchx.cli.cmd_log import get_logs
21
26
  from torchx.runner import config, get_runner, Runner
22
27
  from torchx.runner.config import load_sections
23
28
  from torchx.schedulers import get_default_scheduler_name, get_scheduler_factories
29
+ from torchx.specs import CfgVal, Workspace
24
30
  from torchx.specs.finder import (
25
31
  _Component,
26
32
  ComponentNotFoundException,
@@ -28,6 +34,7 @@ from torchx.specs.finder import (
28
34
  get_builtin_source,
29
35
  get_components,
30
36
  )
37
+ from torchx.util.log_tee_helpers import tee_logs
31
38
  from torchx.util.types import none_throws
32
39
 
33
40
 
@@ -35,10 +42,81 @@ MISSING_COMPONENT_ERROR_MSG = (
35
42
  "missing component name, either provide it from the CLI or in .torchxconfig"
36
43
  )
37
44
 
45
+ LOCAL_SCHEDULER_WARNING_MSG = (
46
+ "`local` scheduler is deprecated and will be"
47
+ " removed in the near future,"
48
+ " please use other variants of the local scheduler"
49
+ " (e.g. `local_cwd`)"
50
+ )
38
51
 
39
52
  logger: logging.Logger = logging.getLogger(__name__)
40
53
 
41
54
 
55
+ @dataclass
56
+ class TorchXRunArgs:
57
+ component_name: str
58
+ scheduler: str
59
+ scheduler_args: Dict[str, Any]
60
+ scheduler_cfg: Dict[str, CfgVal] = field(default_factory=dict)
61
+ dryrun: bool = False
62
+ wait: bool = False
63
+ log: bool = False
64
+ workspace: str = ""
65
+ parent_run_id: Optional[str] = None
66
+ tee_logs: bool = False
67
+ component_args: Dict[str, Any] = field(default_factory=dict)
68
+ component_args_str: List[str] = field(default_factory=list)
69
+
70
+
71
+ def torchx_run_args_from_json(json_data: Dict[str, Any]) -> TorchXRunArgs:
72
+ all_fields = [f.name for f in fields(TorchXRunArgs)]
73
+ required_fields = {
74
+ f.name
75
+ for f in fields(TorchXRunArgs)
76
+ if f.default is DATACLASS_MISSING and f.default_factory is DATACLASS_MISSING
77
+ }
78
+ missing_fields = required_fields - json_data.keys()
79
+ if missing_fields:
80
+ raise ValueError(
81
+ f"The following required fields are missing: {', '.join(missing_fields)}"
82
+ )
83
+
84
+ # Fail if there are fields that aren't part of the run command
85
+ filtered_json_data = {k: v for k, v in json_data.items() if k in all_fields}
86
+ extra_fields = set(json_data.keys()) - set(all_fields)
87
+ if extra_fields:
88
+ raise ValueError(
89
+ f"The following fields are not part of the run command: {', '.join(extra_fields)}.",
90
+ "Please check your JSON and try launching again.",
91
+ )
92
+
93
+ torchx_args = TorchXRunArgs(**filtered_json_data)
94
+ if torchx_args.workspace == "":
95
+ torchx_args.workspace = f"{Path.cwd()}"
96
+ return torchx_args
97
+
98
+
99
+ def torchx_run_args_from_argparse(
100
+ args: argparse.Namespace,
101
+ component_name: str,
102
+ component_args: List[str],
103
+ scheduler_cfg: Dict[str, CfgVal],
104
+ ) -> TorchXRunArgs:
105
+ return TorchXRunArgs(
106
+ component_name=component_name,
107
+ scheduler=args.scheduler,
108
+ scheduler_args={},
109
+ scheduler_cfg=scheduler_cfg,
110
+ dryrun=args.dryrun,
111
+ wait=args.wait,
112
+ log=args.log,
113
+ workspace=args.workspace,
114
+ parent_run_id=args.parent_run_id,
115
+ tee_logs=args.tee_logs,
116
+ component_args_str=component_args,
117
+ )
118
+
119
+
42
120
  def _parse_component_name_and_args(
43
121
  component_name_and_args: List[str],
44
122
  subparser: argparse.ArgumentParser,
@@ -82,6 +160,20 @@ def _parse_component_name_and_args(
82
160
  component = args[0]
83
161
  component_args = args[1:]
84
162
 
163
+ # Error if there are repeated command line arguments each group of arguments,
164
+ # where the groups are separated by "--"
165
+ arg_groups = [list(g) for _, g in groupby(component_args, key=lambda x: x == "--")]
166
+ for arg_group in arg_groups:
167
+ all_options = [
168
+ x
169
+ for x in arg_group
170
+ if x.startswith("-") and x.strip() != "-" and x.strip() != "--"
171
+ ]
172
+ arg_count = Counter(all_options)
173
+ duplicates = [arg for arg, count in arg_count.items() if count > 1]
174
+ if len(duplicates) > 0:
175
+ subparser.error(f"Repeated Command Line Arguments: {duplicates}")
176
+
85
177
  if not component:
86
178
  subparser.error(MISSING_COMPONENT_ERROR_MSG)
87
179
 
@@ -114,6 +206,7 @@ class CmdBuiltins(SubCommand):
114
206
  class CmdRun(SubCommand):
115
207
  def __init__(self) -> None:
116
208
  self._subparser: Optional[argparse.ArgumentParser] = None
209
+ self._stdin_data_json: Optional[Dict[str, Any]] = None
117
210
 
118
211
  def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
119
212
  scheduler_names = get_scheduler_factories().keys()
@@ -131,6 +224,7 @@ class CmdRun(SubCommand):
131
224
  "-cfg",
132
225
  "--scheduler_args",
133
226
  type=str,
227
+ action=ArgOnceAction,
134
228
  help="Arguments to pass to the scheduler (Ex:`cluster=foo,user=bar`)."
135
229
  " For a list of scheduler run options run: `torchx runopts`",
136
230
  )
@@ -156,46 +250,54 @@ class CmdRun(SubCommand):
156
250
  subparser.add_argument(
157
251
  "--workspace",
158
252
  "--buck-target",
159
- default=f"file://{Path.cwd()}",
253
+ default=f"{Path.cwd()}",
160
254
  action=torchxconfig_run,
161
255
  help="local workspace to build/patch (buck-target of main binary if using buck)",
162
256
  )
163
257
  subparser.add_argument(
164
258
  "--parent_run_id",
165
259
  type=str,
260
+ action=ArgOnceAction,
166
261
  help="optional parent run ID that this run belongs to."
167
262
  " It can be used to group runs for experiment tracking purposes",
168
263
  )
264
+ subparser.add_argument(
265
+ "--tee_logs",
266
+ action="store_true",
267
+ default=False,
268
+ help="Add additional prefix to log lines to indicate which replica is printing the log",
269
+ )
270
+ subparser.add_argument(
271
+ "--stdin",
272
+ action="store_true",
273
+ default=False,
274
+ help="Read JSON input from stdin to parse into torchx run args and run the component.",
275
+ )
169
276
  subparser.add_argument(
170
277
  "component_name_and_args",
171
278
  nargs=argparse.REMAINDER,
172
279
  )
173
280
 
174
- def _run(self, runner: Runner, args: argparse.Namespace) -> None:
281
+ def _run_inner(self, runner: Runner, args: TorchXRunArgs) -> None:
175
282
  if args.scheduler == "local":
176
- logger.warning(
177
- "`local` scheduler is deprecated and will be"
178
- " removed in the near future,"
179
- " please use other variants of the local scheduler"
180
- " (e.g. `local_cwd`)"
181
- )
182
-
183
- scheduler_opts = runner.scheduler_run_opts(args.scheduler)
184
- cfg = scheduler_opts.cfg_from_str(args.scheduler_args)
185
- config.apply(scheduler=args.scheduler, cfg=cfg)
283
+ logger.warning(LOCAL_SCHEDULER_WARNING_MSG)
186
284
 
187
- component, component_args = _parse_component_name_and_args(
188
- args.component_name_and_args,
189
- none_throws(self._subparser),
285
+ config.apply(scheduler=args.scheduler, cfg=args.scheduler_cfg)
286
+ component_args = (
287
+ args.component_args_str
288
+ if args.component_args_str != []
289
+ else args.component_args
190
290
  )
191
291
  try:
292
+ workspace = Workspace.from_str(args.workspace) if args.workspace else None
293
+
192
294
  if args.dryrun:
193
295
  dryrun_info = runner.dryrun_component(
194
- component,
296
+ args.component_name,
195
297
  component_args,
196
298
  args.scheduler,
197
- workspace=args.workspace,
198
- cfg=cfg,
299
+ workspace=workspace,
300
+ cfg=args.scheduler_cfg,
199
301
  parent_run_id=args.parent_run_id,
200
302
  )
201
303
  print(
@@ -206,40 +308,143 @@ class CmdRun(SubCommand):
206
308
  print("\n=== SCHEDULER REQUEST ===\n" f"{dryrun_info}")
207
309
  else:
208
310
  app_handle = runner.run_component(
209
- component,
311
+ args.component_name,
210
312
  component_args,
211
313
  args.scheduler,
212
314
  workspace=args.workspace,
213
- cfg=cfg,
315
+ cfg=args.scheduler_cfg,
214
316
  parent_run_id=args.parent_run_id,
215
317
  )
216
318
  # DO NOT delete this line. It is used by slurm tests to retrieve the app id
217
319
  print(app_handle)
218
320
 
219
321
  if args.scheduler.startswith("local"):
220
- self._wait_and_exit(runner, app_handle, log=True)
322
+ self._wait_and_exit(
323
+ runner, app_handle, log=True, tee_logs=args.tee_logs
324
+ )
221
325
  else:
222
326
  logger.info(f"Launched app: {app_handle}")
223
327
  app_status = runner.status(app_handle)
224
328
  if app_status:
225
329
  logger.info(app_status.format())
226
330
  if args.wait or args.log:
227
- self._wait_and_exit(runner, app_handle, log=args.log)
331
+ self._wait_and_exit(
332
+ runner, app_handle, log=args.log, tee_logs=args.tee_logs
333
+ )
228
334
 
229
335
  except (ComponentValidationException, ComponentNotFoundException) as e:
230
- error_msg = f"\nFailed to run component `{component}` got errors: \n {e}"
336
+ error_msg = (
337
+ f"\nFailed to run component `{args.component_name}` got errors: \n {e}"
338
+ )
231
339
  logger.error(error_msg)
232
340
  sys.exit(1)
233
341
  except specs.InvalidRunConfigException as e:
234
342
  error_msg = (
235
- f"Scheduler arg is incorrect or missing required option: `{e.cfg_key}`\n"
236
- f"Run `torchx runopts` to check configuration for `{args.scheduler}` scheduler\n"
237
- f"Use `-cfg` to specify run cfg as `key1=value1,key2=value2` pair\n"
238
- "of setup `.torchxconfig` file, see: https://pytorch.org/torchx/main/experimental/runner.config.html"
343
+ "Invalid scheduler configuration: %s\n"
344
+ "To configure scheduler options, either:\n"
345
+ " 1. Use the `-cfg` command-line argument, e.g., `-cfg key1=value1,key2=value2`\n"
346
+ " 2. Set up a `.torchxconfig` file. For more details, visit: https://meta-pytorch.org/torchx/main/runner.config.html\n"
347
+ "Run `torchx runopts %s` to check all available configuration options for the "
348
+ "`%s` scheduler."
239
349
  )
240
- logger.error(error_msg)
350
+ print(error_msg % (e, args.scheduler, args.scheduler), file=sys.stderr)
241
351
  sys.exit(1)
242
352
 
353
+ def _run_from_cli_args(self, runner: Runner, args: argparse.Namespace) -> None:
354
+ scheduler_opts = runner.scheduler_run_opts(args.scheduler)
355
+ cfg = scheduler_opts.cfg_from_str(args.scheduler_args)
356
+
357
+ component, component_args = _parse_component_name_and_args(
358
+ args.component_name_and_args,
359
+ none_throws(self._subparser),
360
+ )
361
+ torchx_run_args = torchx_run_args_from_argparse(
362
+ args, component, component_args, cfg
363
+ )
364
+ self._run_inner(runner, torchx_run_args)
365
+
366
+ def _run_from_stdin_args(self, runner: Runner, stdin_data: Dict[str, Any]) -> None:
367
+ torchx_run_args = torchx_run_args_from_json(stdin_data)
368
+ scheduler_opts = runner.scheduler_run_opts(torchx_run_args.scheduler)
369
+ cfg = scheduler_opts.cfg_from_json_repr(
370
+ json.dumps(torchx_run_args.scheduler_args)
371
+ )
372
+ torchx_run_args.scheduler_cfg = cfg
373
+ self._run_inner(runner, torchx_run_args)
374
+
375
+ def _get_torchx_stdin_args(
376
+ self, args: argparse.Namespace
377
+ ) -> Optional[Dict[str, Any]]:
378
+ if not args.stdin:
379
+ return None
380
+ if self._stdin_data_json is None:
381
+ self._stdin_data_json = self.torchx_json_from_stdin(args)
382
+ return self._stdin_data_json
383
+
384
+ def torchx_json_from_stdin(
385
+ self, args: Optional[argparse.Namespace] = None
386
+ ) -> Dict[str, Any]:
387
+ try:
388
+ stdin_data_json = json.load(sys.stdin)
389
+ if args and args.dryrun:
390
+ stdin_data_json["dryrun"] = True
391
+ if not isinstance(stdin_data_json, dict):
392
+ logger.error(
393
+ "Invalid JSON input for `torchx run` command. Expected a dictionary."
394
+ )
395
+ sys.exit(1)
396
+ return stdin_data_json
397
+ except (json.JSONDecodeError, EOFError):
398
+ logger.error(
399
+ "Unable to parse JSON input for `torchx run` command, please make sure it's a valid JSON input."
400
+ )
401
+ sys.exit(1)
402
+
403
+ def verify_no_extra_args(self, args: argparse.Namespace) -> None:
404
+ """
405
+ Verifies that only --stdin was provided when using stdin mode.
406
+ """
407
+ if not args.stdin:
408
+ return
409
+
410
+ subparser = none_throws(self._subparser)
411
+ conflicting_args = []
412
+
413
+ # Check each argument against its default value
414
+ for action in subparser._actions:
415
+ if action.dest == "stdin": # Skip stdin itself
416
+ continue
417
+ if action.dest == "help": # Skip help
418
+ continue
419
+ if action.dest == "dryrun": # Skip dryrun
420
+ continue
421
+
422
+ current_value = getattr(args, action.dest, None)
423
+ default_value = action.default
424
+
425
+ # For arguments that differ from default
426
+ if current_value != default_value:
427
+ # Handle special cases where non-default doesn't mean explicitly set
428
+ if action.dest == "component_name_and_args" and current_value == []:
429
+ continue # Empty list is still default
430
+ print(f"*********\n {default_value} = {current_value}")
431
+ conflicting_args.append(f"--{action.dest.replace('_', '-')}")
432
+
433
+ if conflicting_args:
434
+ subparser.error(
435
+ f"Cannot specify {', '.join(conflicting_args)} when using --stdin. "
436
+ "All configuration should be provided in JSON input."
437
+ )
438
+
439
+ def _run(self, runner: Runner, args: argparse.Namespace) -> None:
440
+ self.verify_no_extra_args(args)
441
+ if args.stdin:
442
+ stdin_data_json = self._get_torchx_stdin_args(args)
443
+ if stdin_data_json is not None:
444
+ self._run_from_stdin_args(runner, stdin_data_json)
445
+ else:
446
+ self._run_from_cli_args(runner, args)
447
+
243
448
  def run(self, args: argparse.Namespace) -> None:
244
449
  os.environ["TORCHX_CONTEXT_NAME"] = os.getenv("TORCHX_CONTEXT_NAME", "cli_run")
245
450
  component_defaults = load_sections(prefix="component")
@@ -247,10 +452,16 @@ class CmdRun(SubCommand):
247
452
  with get_runner(component_defaults=component_defaults) as runner:
248
453
  self._run(runner, args)
249
454
 
250
- def _wait_and_exit(self, runner: Runner, app_handle: str, log: bool) -> None:
455
+ def _wait_and_exit(
456
+ self, runner: Runner, app_handle: str, log: bool, tee_logs: bool = False
457
+ ) -> None:
251
458
  logger.info("Waiting for the app to finish...")
252
459
 
253
- log_thread = self._start_log_thread(runner, app_handle) if log else None
460
+ log_thread = (
461
+ self._start_log_thread(runner, app_handle, tee_logs_enabled=tee_logs)
462
+ if log
463
+ else None
464
+ )
254
465
 
255
466
  status = runner.wait(app_handle, wait_interval=1)
256
467
  if not status:
@@ -267,17 +478,30 @@ class CmdRun(SubCommand):
267
478
  else:
268
479
  logger.debug(status)
269
480
 
270
- def _start_log_thread(self, runner: Runner, app_handle: str) -> threading.Thread:
271
- thread = threading.Thread(
272
- target=get_logs,
273
- kwargs={
274
- "file": sys.stderr,
275
- "runner": runner,
276
- "identifier": app_handle,
277
- "regex": None,
278
- "should_tail": True,
279
- },
280
- )
281
- thread.daemon = True
481
+ def _start_log_thread(
482
+ self, runner: Runner, app_handle: str, tee_logs_enabled: bool = False
483
+ ) -> threading.Thread:
484
+ if tee_logs_enabled:
485
+ thread = tee_logs(
486
+ dst=sys.stderr,
487
+ app_handle=app_handle,
488
+ regex=None,
489
+ runner=runner,
490
+ should_tail=True,
491
+ streams=None,
492
+ colorize=not sys.stderr.closed and sys.stderr.isatty(),
493
+ )
494
+ else:
495
+ thread = threading.Thread(
496
+ target=get_logs,
497
+ kwargs={
498
+ "file": sys.stderr,
499
+ "runner": runner,
500
+ "identifier": app_handle,
501
+ "regex": None,
502
+ "should_tail": True,
503
+ },
504
+ )
505
+ thread.daemon = True
282
506
  thread.start()
283
507
  return thread
torchx/cli/cmd_runopts.py CHANGED
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import argparse
9
11
  import logging
10
12
 
torchx/cli/cmd_status.py CHANGED
@@ -5,7 +5,10 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import argparse
11
+ import json
9
12
  import logging
10
13
  import sys
11
14
  from typing import List, Optional
@@ -44,6 +47,11 @@ class CmdStatus(SubCommand):
44
47
  subparser.add_argument(
45
48
  "--roles", type=str, default="", help="comma separated roles to filter"
46
49
  )
50
+ subparser.add_argument(
51
+ "--json",
52
+ action="store_true",
53
+ help="output the status in JSON format",
54
+ )
47
55
 
48
56
  def run(self, args: argparse.Namespace) -> None:
49
57
  app_handle = args.app_handle
@@ -52,7 +60,10 @@ class CmdStatus(SubCommand):
52
60
  app_status = runner.status(app_handle)
53
61
  filter_roles = parse_list_arg(args.roles)
54
62
  if app_status:
55
- print(app_status.format(filter_roles))
63
+ if args.json:
64
+ print(json.dumps(app_status.to_json(filter_roles)))
65
+ else:
66
+ print(app_status.format(filter_roles))
56
67
  else:
57
68
  logger.error(
58
69
  f"AppDef: {app_id},"
torchx/cli/cmd_tracker.py CHANGED
@@ -4,6 +4,8 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ # pyre-strict
8
+
7
9
  import argparse
8
10
  import logging
9
11
 
@@ -43,7 +45,7 @@ class CmdTracker(SubCommand):
43
45
  else:
44
46
  raise RuntimeError(
45
47
  "No trackers configured."
46
- " See: https://pytorch.org/torchx/latest/runtime/tracking.html"
48
+ " See: https://meta-pytorch.org/torchx/latest/runtime/tracking.html"
47
49
  )
48
50
 
49
51
  def add_list_job_arguments(self, subparser: argparse.ArgumentParser) -> None:
torchx/cli/colors.py CHANGED
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import sys
9
11
 
10
12
  # only print colors if outputting directly to a terminal
torchx/cli/main.py CHANGED
@@ -4,6 +4,8 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ # pyre-strict
8
+
7
9
  import logging
8
10
  import os
9
11
  import sys
@@ -14,6 +16,7 @@ import torchx
14
16
  from torchx.cli.cmd_base import SubCommand
15
17
  from torchx.cli.cmd_cancel import CmdCancel
16
18
  from torchx.cli.cmd_configure import CmdConfigure
19
+ from torchx.cli.cmd_delete import CmdDelete
17
20
  from torchx.cli.cmd_describe import CmdDescribe
18
21
  from torchx.cli.cmd_list import CmdList
19
22
  from torchx.cli.cmd_log import CmdLog
@@ -35,6 +38,7 @@ def get_default_sub_cmds() -> Dict[str, SubCommand]:
35
38
  "builtins": CmdBuiltins(),
36
39
  "cancel": CmdCancel(),
37
40
  "configure": CmdConfigure(),
41
+ "delete": CmdDelete(),
38
42
  "describe": CmdDescribe(),
39
43
  "list": CmdList(),
40
44
  "log": CmdLog(),
@@ -4,6 +4,8 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ # pyre-strict
8
+
7
9
  """
8
10
  This module contains a collection of builtin TorchX components. The directory
9
11
  structure is organized by component category. Components are simply
@@ -179,7 +181,7 @@ To validate that you've defined your component correctly you can either:
179
181
 
180
182
  1. (easiest) Dryrun your component's ``--help`` with the cli: ``torchx run --dryrun ~/component.py:train --help``
181
183
  2. Use the component :ref:`linter<specs:Component Linter>`
182
- (see `dist_test.py <https://github.com/pytorch/torchx/blob/main/torchx/components/test/dist_test.py>`_ as an example)
184
+ (see `dist_test.py <https://github.com/meta-pytorch/torchx/blob/main/torchx/components/test/dist_test.py>`_ as an example)
183
185
 
184
186
 
185
187
  Running as a Job
@@ -296,13 +298,6 @@ imagine the component is defined as:
296
298
  * ``*args=["--help"]``: ``torchx run comp.py:f -- --help``
297
299
  * ``*args=["--i", "2"]``: ``torchx run comp.py:f --i 1 -- --i 2``
298
300
 
299
- Run in a Pipeline
300
- --------------------------------
301
-
302
- The :ref:`torchx.pipelines<pipelines:torchx.pipelines>` define adapters that
303
- convert a torchx component into the object that represents a pipeline "stage" in the
304
- target pipeline platform (see :ref:`Pipelines` for a list of supported pipeline orchestrators).
305
-
306
301
  Additional Resources
307
302
  -----------------------
308
303
 
@@ -4,6 +4,8 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ # pyre-strict
8
+
7
9
  """
8
10
  You can unit test the component definitions as you would normal Python code
9
11
  since they are valid Python definitions.