torchx-nightly 2024.6.20__py3-none-any.whl → 2024.6.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchx-nightly might be problematic. Click here for more details.
- torchx/schedulers/devices.py +15 -4
- torchx/specs/named_resources_aws.py +7 -2
- {torchx_nightly-2024.6.20.dist-info → torchx_nightly-2024.6.22.dist-info}/METADATA +1 -1
- {torchx_nightly-2024.6.20.dist-info → torchx_nightly-2024.6.22.dist-info}/RECORD +8 -8
- {torchx_nightly-2024.6.20.dist-info → torchx_nightly-2024.6.22.dist-info}/LICENSE +0 -0
- {torchx_nightly-2024.6.20.dist-info → torchx_nightly-2024.6.22.dist-info}/WHEEL +0 -0
- {torchx_nightly-2024.6.20.dist-info → torchx_nightly-2024.6.22.dist-info}/entry_points.txt +0 -0
- {torchx_nightly-2024.6.20.dist-info → torchx_nightly-2024.6.22.dist-info}/top_level.txt +0 -0
torchx/schedulers/devices.py
CHANGED
|
@@ -7,25 +7,36 @@
|
|
|
7
7
|
|
|
8
8
|
# pyre-strict
|
|
9
9
|
import warnings
|
|
10
|
+
from functools import partial
|
|
10
11
|
from typing import Callable, Dict, List, Mapping
|
|
11
12
|
|
|
12
13
|
from torchx.specs.api import DeviceMount
|
|
14
|
+
from torchx.specs.named_resources_aws import EFA_DEVICE, NEURON_DEVICE
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
def
|
|
17
|
+
def to_devicemounts(num_devices: int, device_type: str) -> List[DeviceMount]:
|
|
16
18
|
device_mounts = []
|
|
17
19
|
for device_index in range(0, num_devices):
|
|
18
20
|
device_mounts.append(
|
|
19
21
|
DeviceMount(
|
|
20
|
-
src_path=
|
|
21
|
-
dst_path=
|
|
22
|
+
src_path=device_type + str(device_index),
|
|
23
|
+
dst_path=device_type + str(device_index),
|
|
22
24
|
)
|
|
23
25
|
)
|
|
24
26
|
return device_mounts
|
|
25
27
|
|
|
26
28
|
|
|
29
|
+
neuron_to_devicemounts: Callable[[int], List[DeviceMount]] = partial(
|
|
30
|
+
to_devicemounts, device_type="/dev/neuron"
|
|
31
|
+
)
|
|
32
|
+
efa_to_devicemounts: Callable[[int], List[DeviceMount]] = partial(
|
|
33
|
+
to_devicemounts, device_type="/dev/infiniband/uverbs"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
27
37
|
DEVICES: Mapping[str, Callable[[int], List[DeviceMount]]] = {
|
|
28
|
-
|
|
38
|
+
EFA_DEVICE: efa_to_devicemounts,
|
|
39
|
+
NEURON_DEVICE: neuron_to_devicemounts,
|
|
29
40
|
}
|
|
30
41
|
|
|
31
42
|
|
|
@@ -37,6 +37,7 @@ from typing import Callable, Mapping
|
|
|
37
37
|
from torchx.specs.api import Resource
|
|
38
38
|
|
|
39
39
|
EFA_DEVICE = "vpc.amazonaws.com/efa"
|
|
40
|
+
NEURON_DEVICE = "aws.amazon.com/neurondevice"
|
|
40
41
|
|
|
41
42
|
# ecs and ec2 have memtax and currently AWS Batch uses hard memory limits
|
|
42
43
|
# so we have to account for mem tax when registering these resources for AWS
|
|
@@ -255,7 +256,11 @@ def aws_g5_48xlarge() -> Resource:
|
|
|
255
256
|
|
|
256
257
|
def aws_trn1_2xlarge() -> Resource:
|
|
257
258
|
return Resource(
|
|
258
|
-
cpu=8,
|
|
259
|
+
cpu=8,
|
|
260
|
+
gpu=0,
|
|
261
|
+
memMB=32 * GiB,
|
|
262
|
+
capabilities={K8S_ITYPE: "trn1.2xlarge"},
|
|
263
|
+
devices={NEURON_DEVICE: 1},
|
|
259
264
|
)
|
|
260
265
|
|
|
261
266
|
|
|
@@ -265,7 +270,7 @@ def aws_trn1_32xlarge() -> Resource:
|
|
|
265
270
|
gpu=0,
|
|
266
271
|
memMB=512 * GiB,
|
|
267
272
|
capabilities={K8S_ITYPE: "trn1.32xlarge"},
|
|
268
|
-
devices={EFA_DEVICE: 8},
|
|
273
|
+
devices={EFA_DEVICE: 8, NEURON_DEVICE: 16},
|
|
269
274
|
)
|
|
270
275
|
|
|
271
276
|
|
|
@@ -68,7 +68,7 @@ torchx/schedulers/__init__.py,sha256=M9SBZiNdE3KI_yc1-BiRtAetfTgtX07uKkuvGUeZQLU
|
|
|
68
68
|
torchx/schedulers/api.py,sha256=s2hI87uAWtU2SHMNBKjAqelzQU_GKp_BjcxdtjVVDDk,14155
|
|
69
69
|
torchx/schedulers/aws_batch_scheduler.py,sha256=7qxy3UFRq0F731-kTjEi6VABWKD60o0req6CBMsTohU,27975
|
|
70
70
|
torchx/schedulers/aws_sagemaker_scheduler.py,sha256=dPah3yaKFUVm-ZZrzFbyM_abP-LCTd-AcAjZ6t2iycU,20699
|
|
71
|
-
torchx/schedulers/devices.py,sha256=
|
|
71
|
+
torchx/schedulers/devices.py,sha256=RjVcu22ZRl_9OKtOtmA1A3vNXgu2qD6A9ST0L0Hsg4I,1734
|
|
72
72
|
torchx/schedulers/docker_scheduler.py,sha256=IrDlmeH-tg_f3krA04Y81nK9dmuYfEPbYOuCjSQkIHA,16541
|
|
73
73
|
torchx/schedulers/gcp_batch_scheduler.py,sha256=dlUfvjfMuQiRcSXQAdwxqdadwPhOf82L5u-ejRWtFgE,16226
|
|
74
74
|
torchx/schedulers/ids.py,sha256=3E-_vwVYC-8Tv8kjuY9-W7TbOe_-Laqd8a65uIN3hQY,1798
|
|
@@ -87,7 +87,7 @@ torchx/specs/api.py,sha256=Y5uT7a-qZ4pP4kSfS6yYP1jUzQWLPI4qLQeyuBv5uDQ,36085
|
|
|
87
87
|
torchx/specs/builders.py,sha256=QDcQrnCO4bdSaiP0216XbCgTsnLutO_1_FW5jDiEIWI,9939
|
|
88
88
|
torchx/specs/file_linter.py,sha256=IeiomB1BgHUlT-ZsvGxar3llY63NOupfLBrOrD_---A,11860
|
|
89
89
|
torchx/specs/finder.py,sha256=MnwxG_UC4a-3X2wQ37ANEQR6D1TvriCLyuVYBh_-wuI,16249
|
|
90
|
-
torchx/specs/named_resources_aws.py,sha256=
|
|
90
|
+
torchx/specs/named_resources_aws.py,sha256=NDzF9srT7hiS5NGwEJc_sbuwxXMtq8l3rVG0QnVqpJE,8114
|
|
91
91
|
torchx/specs/named_resources_generic.py,sha256=Sg4tAdqiiWDrDz2Lj_pnfsjzGIXKTou73wPseh6j55w,2646
|
|
92
92
|
torchx/specs/test/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
|
93
93
|
torchx/specs/test/components/a/__init__.py,sha256=kdxEgnI8QBSBiuTjaB4qDD7JX84hWowyPWU4B2Cqe9A,561
|
|
@@ -113,9 +113,9 @@ torchx/workspace/__init__.py,sha256=FqN8AN4VhR1C_SBY10MggQvNZmyanbbuPuE-JCjkyUY,
|
|
|
113
113
|
torchx/workspace/api.py,sha256=1heBmPgB-W5Zf9gwViM7NrqvHpZlVYeMN7jpY8Qkytc,5479
|
|
114
114
|
torchx/workspace/dir_workspace.py,sha256=npNW_IjUZm_yS5r-8hrRkH46ndDd9a_eApT64m1S1T4,2268
|
|
115
115
|
torchx/workspace/docker_workspace.py,sha256=PFu2KQNVC-0p2aKJ-W_BKA9ZOmXdCY2ABEkCExp3udQ,10269
|
|
116
|
-
torchx_nightly-2024.6.
|
|
117
|
-
torchx_nightly-2024.6.
|
|
118
|
-
torchx_nightly-2024.6.
|
|
119
|
-
torchx_nightly-2024.6.
|
|
120
|
-
torchx_nightly-2024.6.
|
|
121
|
-
torchx_nightly-2024.6.
|
|
116
|
+
torchx_nightly-2024.6.22.dist-info/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
|
|
117
|
+
torchx_nightly-2024.6.22.dist-info/METADATA,sha256=GZuQkS1dg11xhaILgpdZwbQE3N87zlOrLnlwFGkJoKc,6184
|
|
118
|
+
torchx_nightly-2024.6.22.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
119
|
+
torchx_nightly-2024.6.22.dist-info/entry_points.txt,sha256=3JYZFlX9aWzR-Gs_qsx1zq7mlqbFz6Mi9rQUULW8caI,170
|
|
120
|
+
torchx_nightly-2024.6.22.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
|
|
121
|
+
torchx_nightly-2024.6.22.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|