torchft-nightly 2026.1.3__cp310-cp310-manylinux_2_24_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torchft/__init__.py +34 -0
- torchft/_test/diloco_trainer.py +287 -0
- torchft/_test/managed_work_test.py +320 -0
- torchft/_test_utils.py +111 -0
- torchft/_torchft.cpython-310-x86_64-linux-gnu.so +0 -0
- torchft/_torchft.pyi +116 -0
- torchft/checkpointing/__init__.py +20 -0
- torchft/checkpointing/_rwlock.py +136 -0
- torchft/checkpointing/_serialization.py +39 -0
- torchft/checkpointing/http_transport.py +299 -0
- torchft/checkpointing/http_transport_bench.py +61 -0
- torchft/checkpointing/http_transport_test.py +146 -0
- torchft/checkpointing/pg_transport.py +306 -0
- torchft/checkpointing/pg_transport_bench.py +99 -0
- torchft/checkpointing/pg_transport_test.py +101 -0
- torchft/checkpointing/rwlock_test.py +58 -0
- torchft/checkpointing/transport.py +68 -0
- torchft/checkpointing/transport_test.py +161 -0
- torchft/collectives.py +415 -0
- torchft/collectives_test.py +212 -0
- torchft/coordination.py +39 -0
- torchft/coordination_test.py +29 -0
- torchft/data.py +77 -0
- torchft/data_test.py +39 -0
- torchft/ddp.py +105 -0
- torchft/ddp_test.py +68 -0
- torchft/diloco_regression_test.py +644 -0
- torchft/examples/slurm/README.md +34 -0
- torchft/examples/slurm/punisher.py +95 -0
- torchft/examples/slurm/runner.py +221 -0
- torchft/fsdp_test.py +102 -0
- torchft/futures.py +353 -0
- torchft/futures_test.py +140 -0
- torchft/http.py +13 -0
- torchft/lighthouse_test.py +163 -0
- torchft/local_sgd.py +796 -0
- torchft/local_sgd_integ_test.py +600 -0
- torchft/local_sgd_test.py +324 -0
- torchft/manager.py +1358 -0
- torchft/manager_integ_test.py +653 -0
- torchft/manager_test.py +911 -0
- torchft/multiprocessing.py +38 -0
- torchft/multiprocessing_dummy_context.py +135 -0
- torchft/multiprocessing_test.py +58 -0
- torchft/optim.py +63 -0
- torchft/optim_test.py +50 -0
- torchft/otel.py +134 -0
- torchft/parameter_server.py +195 -0
- torchft/parameter_server_test.py +47 -0
- torchft/process_group.py +2118 -0
- torchft/process_group_test.py +1028 -0
- torchft/quantization.py +686 -0
- torchft/quantization_test.py +131 -0
- torchft/torchx.py +89 -0
- torchft/utils.py +67 -0
- torchft/work.py +26 -0
- torchft_nightly-2026.1.3.dist-info/METADATA +308 -0
- torchft_nightly-2026.1.3.dist-info/RECORD +61 -0
- torchft_nightly-2026.1.3.dist-info/WHEEL +4 -0
- torchft_nightly-2026.1.3.dist-info/entry_points.txt +2 -0
- torchft_nightly-2026.1.3.dist-info/licenses/LICENSE +34 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from unittest import TestCase
|
|
8
|
+
from unittest.mock import MagicMock
|
|
9
|
+
|
|
10
|
+
import torch
|
|
11
|
+
|
|
12
|
+
from torchft.parameter_server import ParameterServer
|
|
13
|
+
from torchft.process_group import ProcessGroup, ProcessGroupGloo
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MyParameterServer(ParameterServer):
|
|
17
|
+
def __init__(self) -> None:
|
|
18
|
+
super().__init__(port=0)
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def new_process_group(cls) -> ProcessGroup:
|
|
22
|
+
return ProcessGroupGloo()
|
|
23
|
+
|
|
24
|
+
def forward(self, session_id: str, pg: ProcessGroup) -> None:
|
|
25
|
+
data = torch.zeros(1)
|
|
26
|
+
pg.broadcast_one(data, root=1).wait()
|
|
27
|
+
|
|
28
|
+
data += 23
|
|
29
|
+
|
|
30
|
+
pg.broadcast_one(data, root=0).wait()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TestParameterServer(TestCase):
|
|
34
|
+
def test_parameter_server(self) -> None:
|
|
35
|
+
ps = MyParameterServer()
|
|
36
|
+
|
|
37
|
+
addr = ps.address()
|
|
38
|
+
pg = MyParameterServer.new_session(addr)
|
|
39
|
+
|
|
40
|
+
data = torch.zeros(1)
|
|
41
|
+
data += 12
|
|
42
|
+
# send to server (0) from client (1)
|
|
43
|
+
pg.broadcast_one(data, root=1).wait()
|
|
44
|
+
|
|
45
|
+
# recv from server (0) to client (1)
|
|
46
|
+
pg.broadcast_one(data, root=0).wait()
|
|
47
|
+
self.assertEqual(data[0].item(), 12 + 23)
|