torchx-nightly 2024.1.29__py3-none-any.whl → 2024.1.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchx-nightly might be problematic. Click here for more details.
- torchx/components/dist.py +3 -1
- torchx/schedulers/gcp_batch_scheduler.py +0 -5
- torchx/workspace/docker_workspace.py +6 -1
- {torchx_nightly-2024.1.29.dist-info → torchx_nightly-2024.1.31.dist-info}/METADATA +1 -1
- {torchx_nightly-2024.1.29.dist-info → torchx_nightly-2024.1.31.dist-info}/RECORD +9 -9
- {torchx_nightly-2024.1.29.dist-info → torchx_nightly-2024.1.31.dist-info}/LICENSE +0 -0
- {torchx_nightly-2024.1.29.dist-info → torchx_nightly-2024.1.31.dist-info}/WHEEL +0 -0
- {torchx_nightly-2024.1.29.dist-info → torchx_nightly-2024.1.31.dist-info}/entry_points.txt +0 -0
- {torchx_nightly-2024.1.29.dist-info → torchx_nightly-2024.1.31.dist-info}/top_level.txt +0 -0
torchx/components/dist.py
CHANGED
|
@@ -174,6 +174,7 @@ def ddp(
|
|
|
174
174
|
rdzv_backend: str = "c10d",
|
|
175
175
|
mounts: Optional[List[str]] = None,
|
|
176
176
|
debug: bool = False,
|
|
177
|
+
tee: int = 3,
|
|
177
178
|
) -> specs.AppDef:
|
|
178
179
|
"""
|
|
179
180
|
Distributed data parallel style application (one role, multi-replica).
|
|
@@ -208,6 +209,7 @@ def ddp(
|
|
|
208
209
|
mounts: mounts to mount into the worker environment/container (ex. type=<bind/volume>,src=/host,dst=/job[,readonly]).
|
|
209
210
|
See scheduler documentation for more info.
|
|
210
211
|
debug: whether to run with preset debug flags enabled
|
|
212
|
+
tee: tees the specified std stream(s) to console + file. 0: none, 1: stdout, 2: stderr, 3: both
|
|
211
213
|
"""
|
|
212
214
|
|
|
213
215
|
if (script is None) == (m is None):
|
|
@@ -262,7 +264,7 @@ def ddp(
|
|
|
262
264
|
"--nproc_per_node",
|
|
263
265
|
str(nproc_per_node),
|
|
264
266
|
"--tee",
|
|
265
|
-
|
|
267
|
+
str(tee),
|
|
266
268
|
"--role",
|
|
267
269
|
"",
|
|
268
270
|
]
|
|
@@ -205,14 +205,12 @@ class GCPBatchScheduler(Scheduler[GCPBatchOpts]):
|
|
|
205
205
|
if cpu <= 0:
|
|
206
206
|
cpu = 1
|
|
207
207
|
MILLI = 1000
|
|
208
|
-
# pyre-fixme[8]: Attribute has type `Field`; used as `int`.
|
|
209
208
|
res.cpu_milli = cpu * MILLI
|
|
210
209
|
memMB = resource.memMB
|
|
211
210
|
if memMB < 0:
|
|
212
211
|
raise ValueError(
|
|
213
212
|
f"memMB should to be set to a positive value, got {memMB}"
|
|
214
213
|
)
|
|
215
|
-
# pyre-fixme[8]: Attribute has type `Field`; used as `int`.
|
|
216
214
|
res.memory_mib = memMB
|
|
217
215
|
|
|
218
216
|
# TODO support named resources
|
|
@@ -360,13 +358,11 @@ class GCPBatchScheduler(Scheduler[GCPBatchOpts]):
|
|
|
360
358
|
return None
|
|
361
359
|
|
|
362
360
|
gpu = 0
|
|
363
|
-
# pyre-fixme[16]: `Field` has no attribute `instances`.
|
|
364
361
|
if len(job.allocation_policy.instances) != 0:
|
|
365
362
|
gpu_type = job.allocation_policy.instances[0].policy.machine_type
|
|
366
363
|
gpu = GPU_TYPE_TO_COUNT[gpu_type]
|
|
367
364
|
|
|
368
365
|
roles = {}
|
|
369
|
-
# pyre-fixme[16]: `RepeatedField` has no attribute `__iter__`.
|
|
370
366
|
for tg in job.task_groups:
|
|
371
367
|
env = tg.task_spec.environment.variables
|
|
372
368
|
role = env["TORCHX_ROLE_NAME"]
|
|
@@ -390,7 +386,6 @@ class GCPBatchScheduler(Scheduler[GCPBatchOpts]):
|
|
|
390
386
|
# TODO map role/replica status
|
|
391
387
|
desc = DescribeAppResponse(
|
|
392
388
|
app_id=app_id,
|
|
393
|
-
# pyre-fixme[16]: `Field` has no attribute `state`.
|
|
394
389
|
state=JOB_STATE[job.status.state.name],
|
|
395
390
|
roles=list(roles.values()),
|
|
396
391
|
)
|
|
@@ -105,6 +105,10 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
|
|
|
105
105
|
workspace: a fsspec path to a directory with contents to be overlaid
|
|
106
106
|
"""
|
|
107
107
|
|
|
108
|
+
old_imgs = [
|
|
109
|
+
image.id
|
|
110
|
+
for image in self._docker_client.images.list(name=cfg["image_repo"])
|
|
111
|
+
]
|
|
108
112
|
context = _build_context(role.image, workspace)
|
|
109
113
|
|
|
110
114
|
try:
|
|
@@ -129,7 +133,8 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
|
|
|
129
133
|
self.LABEL_VERSION: torchx.__version__,
|
|
130
134
|
},
|
|
131
135
|
)
|
|
132
|
-
role.image
|
|
136
|
+
if len(old_imgs) == 0 or role.image not in old_imgs:
|
|
137
|
+
role.image = image.id
|
|
133
138
|
finally:
|
|
134
139
|
context.close()
|
|
135
140
|
|
|
@@ -24,7 +24,7 @@ torchx/cli/colors.py,sha256=bVN_jEDwLgvypnDMeCHKn0q0ZDDhQjBJnyVfZHAE6nc,553
|
|
|
24
24
|
torchx/cli/main.py,sha256=DJYikTWacADa4VoscqZGjZmMKWWK29tBl6-pGtnzsRE,3469
|
|
25
25
|
torchx/components/__init__.py,sha256=6-TQ4SY-Tn56os_1lOs_HMabOoE7gkkud_8e1BgvfJw,12106
|
|
26
26
|
torchx/components/component_test_base.py,sha256=eKOwBp5cRgiA4FgZd_FCvyJ-ppv2v3JN9AGXnaSK_Cw,4135
|
|
27
|
-
torchx/components/dist.py,sha256
|
|
27
|
+
torchx/components/dist.py,sha256=tBOL_DjUeBjNDUcF2wnjlUEzZck9fo1ojRkbQA5ERT8,14555
|
|
28
28
|
torchx/components/interpret.py,sha256=g8gkKdDJvsBfX1ZrpVT7n2bMEtmwRV_1AqDyAnnQ_aA,697
|
|
29
29
|
torchx/components/metrics.py,sha256=1gbp8BfzZWGa7PD1db5vRADlONzmae4qSBUUdCWayr0,2814
|
|
30
30
|
torchx/components/serve.py,sha256=9RlpwlU2KOC7sMOZBeYwUpJIKDCXrU8xNo1SH-AT3fc,2141
|
|
@@ -68,7 +68,7 @@ torchx/schedulers/api.py,sha256=XlYrD6ZjV71HotJxdVZxA_Zc8DuxhM4KKCnkibqZflU,1414
|
|
|
68
68
|
torchx/schedulers/aws_batch_scheduler.py,sha256=t6wGK1NUjhTAoxH3ie7lIMTl2cxwdixkcT3HxKsggDk,27517
|
|
69
69
|
torchx/schedulers/devices.py,sha256=PNbcpf8fEM18Ag1RgK9Q30zPBalEcPdsFWctdbLxuv8,1352
|
|
70
70
|
torchx/schedulers/docker_scheduler.py,sha256=I-kZN-dXoQyokLPe9ZKjfhkVX5lHx_C5jvLLc2JmXQQ,15456
|
|
71
|
-
torchx/schedulers/gcp_batch_scheduler.py,sha256=
|
|
71
|
+
torchx/schedulers/gcp_batch_scheduler.py,sha256=pgPnlx5tzogPw6FuOkoaxBY-rVjEh3mWFP-yX5izGZ8,16206
|
|
72
72
|
torchx/schedulers/ids.py,sha256=IGsJEbCYTdfKdU3MhKLQU6b7sWCJy5dlRV6JIL_9BlE,1783
|
|
73
73
|
torchx/schedulers/kubernetes_mcad_scheduler.py,sha256=xAt-on3K8HwS2kzWasn0zXd2q4IDQzo2N5A5Ehh9NII,42885
|
|
74
74
|
torchx/schedulers/kubernetes_scheduler.py,sha256=6NXYJwiYCXNeB3ubr8t4q_SuAa-vlYdiCAPXTB3f-zg,27068
|
|
@@ -110,10 +110,10 @@ torchx/util/types.py,sha256=6ASuDKGO91UU3DCSuWhPX_C03341tApLCQEByUz8xpY,7016
|
|
|
110
110
|
torchx/workspace/__init__.py,sha256=KbGEzJqqXaIxALm_EQO64aw-fE7MeDMFXcpU1mY650I,783
|
|
111
111
|
torchx/workspace/api.py,sha256=Ej6DR__mNWaVyZgoVNAAOloDy1kTD5X1jz7pRtoVf80,5464
|
|
112
112
|
torchx/workspace/dir_workspace.py,sha256=Fz-hKIx0KN8iJf2BsthNj0NvTkWlxP6WFsElPs_BaT0,2253
|
|
113
|
-
torchx/workspace/docker_workspace.py,sha256=
|
|
114
|
-
torchx_nightly-2024.1.
|
|
115
|
-
torchx_nightly-2024.1.
|
|
116
|
-
torchx_nightly-2024.1.
|
|
117
|
-
torchx_nightly-2024.1.
|
|
118
|
-
torchx_nightly-2024.1.
|
|
119
|
-
torchx_nightly-2024.1.
|
|
113
|
+
torchx/workspace/docker_workspace.py,sha256=cqvxHTtrVza0gkoImbzeRiroWkCjdBXRVgONvaulS0g,9410
|
|
114
|
+
torchx_nightly-2024.1.31.dist-info/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
|
|
115
|
+
torchx_nightly-2024.1.31.dist-info/METADATA,sha256=07GcG1r70hTFIjIOPpGN88EQFp3oQ7TIRwFvEl4lSkI,5611
|
|
116
|
+
torchx_nightly-2024.1.31.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
117
|
+
torchx_nightly-2024.1.31.dist-info/entry_points.txt,sha256=3JYZFlX9aWzR-Gs_qsx1zq7mlqbFz6Mi9rQUULW8caI,170
|
|
118
|
+
torchx_nightly-2024.1.31.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
|
|
119
|
+
torchx_nightly-2024.1.31.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|