PyPI - torchax - Versions diffs - 0.0.10.dev20251116__py3-none-any.whl → 0.0.11.dev202612__py3-none-any.whl - Mend

torchax 0.0.10.dev20251116py3-none-any.whl → 0.0.11.dev202612py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchax might be problematic. Click here for more details.

Files changed (30) hide show

torchax/__init__.py +73 -77
torchax/amp.py +143 -271
torchax/checkpoint.py +15 -9
torchax/config.py +0 -4
torchax/decompositions.py +66 -60
torchax/export.py +53 -54
torchax/flax.py +7 -5
torchax/interop.py +66 -62
torchax/mesh_util.py +20 -18
torchax/ops/__init__.py +4 -3
torchax/ops/jaten.py +3841 -3968
torchax/ops/jax_reimplement.py +68 -42
torchax/ops/jc10d.py +4 -6
torchax/ops/jimage.py +20 -25
torchax/ops/jlibrary.py +6 -6
torchax/ops/jtorch.py +355 -419
torchax/ops/jtorchvision_nms.py +69 -49
torchax/ops/mappings.py +42 -63
torchax/ops/op_base.py +17 -25
torchax/ops/ops_registry.py +35 -30
torchax/tensor.py +124 -128
torchax/train.py +100 -102
torchax/types.py +8 -7
torchax/util.py +6 -4
torchax/view.py +144 -136
{torchax-0.0.10.dev20251116.dist-info → torchax-0.0.11.dev202612.dist-info}/METADATA +7 -1
torchax-0.0.11.dev202612.dist-info/RECORD +31 -0
{torchax-0.0.10.dev20251116.dist-info → torchax-0.0.11.dev202612.dist-info}/WHEEL +1 -1
torchax-0.0.10.dev20251116.dist-info/RECORD +0 -31
{torchax-0.0.10.dev20251116.dist-info → torchax-0.0.11.dev202612.dist-info}/licenses/LICENSE +0 -0

torchax/train.py CHANGED Viewed

@@ -13,120 +13,118 @@
 # limitations under the License.
 import collections
-import functools
-import torch
 import jax
+import optax
+import torch
 import torchax
 from torchax import interop
-from torchax.interop import torch_view, jax_view
-import optax
+from torchax.interop import torch_view
 remat = torch_view(jax.remat)
 mark_sharding = torch_view(jax.lax.with_sharding_constraint)
 def make_train_step(model_fn, loss_fn, optax_optimizer, remat_policy=None):
-    """Make a function that do one train step given model and loss.
-    model_fn: a function representing the model's forward:
-        i.e. has signature Callable[weights, buffers, args] -> result. Where,
-        weights is a pytree of trainable parameters
-        buffers is a pytree of non-trainable parameters / constants
-        args is the input data loaded from the data set
-        result is the return value of the model
-    loss_fn: a function to compute loss.
-        i.e. it has signature of Callable[result, label] -> loss
-        where, result is what model_fn returned
-          loss is loaded from the dataloader.
-    optax_optimizer: the optimizer from optax library. for example, optax.adam
-    remat_policy: One of jax.ad_checkpoint.checkpoint_policies, specifies how
-        to do gradient checkpointing. If None, then it means checkpoint everything.
-    """
-    env = torchax.default_env()
-    def loss(weights, buffers, args, label):  # inputs are XLATensor
-        with env, jax.named_scope("compute_loss"):
-            res = model_fn(weights, buffers, args)
-            l = loss_fn(res, label)
-            return l
-    # loss = interop.gradient_checkpoint(loss, kwargs={'policy': remat_policy})
-    grad_fn = interop.jax_value_and_grad(loss)
-    def step(weights, buffers, opt_state, args, label):  # inputs are array
-        with jax.named_scope("compute_gradient"):
-            loss, gradient = grad_fn(weights, buffers, args, label)
-        with jax.named_scope("optimizer_updates"):
-            updates, opt_state = interop.call_jax(
-                optax_optimizer.update, gradient, opt_state, weights
-            )
-            weights = interop.call_jax(optax.apply_updates, weights, updates)
-        return loss, weights, opt_state
-    # TODO: apply jax.jit so the user don't have to.
-    return step
+  """Make a function that do one train step given model and loss.
+  model_fn: a function representing the model's forward:
+      i.e. has signature Callable[weights, buffers, args] -> result. Where,
+      weights is a pytree of trainable parameters
+      buffers is a pytree of non-trainable parameters / constants
+      args is the input data loaded from the data set
+      result is the return value of the model
+  loss_fn: a function to compute loss.
+      i.e. it has signature of Callable[result, label] -> loss
+      where, result is what model_fn returned
+        loss is loaded from the dataloader.
+  optax_optimizer: the optimizer from optax library. for example, optax.adam
+  remat_policy: One of jax.ad_checkpoint.checkpoint_policies, specifies how
+      to do gradient checkpointing. If None, then it means checkpoint everything.
+  """
+  env = torchax.default_env()
+  def loss(weights, buffers, args, label):  # inputs are XLATensor
+    with env, jax.named_scope("compute_loss"):
+      res = model_fn(weights, buffers, args)
+      l = loss_fn(res, label)  # noqa: E741
+      return l
+  # loss = interop.gradient_checkpoint(loss, kwargs={'policy': remat_policy})
+  grad_fn = interop.jax_value_and_grad(loss)
+  def step(weights, buffers, opt_state, args, label):  # inputs are array
+    with jax.named_scope("compute_gradient"):
+      loss, gradient = grad_fn(weights, buffers, args, label)
+    with jax.named_scope("optimizer_updates"):
+      updates, opt_state = interop.call_jax(
+        optax_optimizer.update, gradient, opt_state, weights
+      )
+      weights = interop.call_jax(optax.apply_updates, weights, updates)
+    return loss, weights, opt_state
+  # TODO: apply jax.jit so the user don't have to.
+  return step
 class Container:
-    pass
+  pass
 class ScannedModule(torch.nn.Module):
-    def __init__(self, module_list, checkpoint_policy=None):
-        super().__init__()
-        self.c = None
-        assert module_list
-        self.c = Container()
-        self.c.one_mod = module_list[0]
-        self.checkpoint_policy = checkpoint_policy
-        weights = self._stack_layer_weights(module_list)
-        self.layer_weights_keys = list(self.c.one_mod.state_dict().keys())
-        self.params = torch.nn.ParameterDict(
-            {self._param_name_new(k): v for k, v in weights.items()}
-        )
-    def _stack_layer_weights(self, module_list):
-        # Create weights such that, for every [n, m] weights
-        # becomes [k, n, m] where k is number of layer
-        # i.e. stacking layer weights together
-        temp = collections.defaultdict(list)
-        for m in module_list:
-            for k, v in m.state_dict().items():
-                temp[k].append(v)
-        res = {k: torch.stack(v) for k, v in temp.items()}
-        return res
-    def _param_name_new(self, old):
-        return "___".join(old.split("."))
-    def _param_name_old(self, new):
-        return ".".join(new.split("___"))
-    def forward(self, *args, **kwargs):
-        assert not kwargs
-        weights = {
-            k: self.params[self._param_name_new(k)]
-            for k in self.layer_weights_keys
-        }
-        scan = interop.torch_view(jax.lax.scan)
-        def eval_one_layer(args, weight):
-            # unpack args
-            h, *rest = args
-            newh = torch.func.functional_call(self.c.one_mod, weight, args)
-            # next layer's input; and residual to be added to list
-            return (newh, *rest), None
-        _eval_one_layer = interop.gradient_checkpoint(
-            eval_one_layer,
-            kwargs={"policy": self.checkpoint_policy},
-        )
-        h, _ = scan(
-            _eval_one_layer,
-            args,
-            weights,
-        )
-        return h[0]
+  def __init__(self, module_list, checkpoint_policy=None):
+    super().__init__()
+    self.c = None
+    assert module_list
+    self.c = Container()
+    self.c.one_mod = module_list[0]
+    self.checkpoint_policy = checkpoint_policy
+    weights = self._stack_layer_weights(module_list)
+    self.layer_weights_keys = list(self.c.one_mod.state_dict().keys())
+    self.params = torch.nn.ParameterDict(
+      {self._param_name_new(k): v for k, v in weights.items()}
+    )
+  def _stack_layer_weights(self, module_list):
+    # Create weights such that, for every [n, m] weights
+    # becomes [k, n, m] where k is number of layer
+    # i.e. stacking layer weights together
+    temp = collections.defaultdict(list)
+    for m in module_list:
+      for k, v in m.state_dict().items():
+        temp[k].append(v)
+    res = {k: torch.stack(v) for k, v in temp.items()}
+    return res
+  def _param_name_new(self, old):
+    return "___".join(old.split("."))
+  def _param_name_old(self, new):
+    return ".".join(new.split("___"))
+  def forward(self, *args, **kwargs):
+    assert not kwargs
+    weights = {k: self.params[self._param_name_new(k)] for k in self.layer_weights_keys}
+    scan = interop.torch_view(jax.lax.scan)
+    def eval_one_layer(args, weight):
+      # unpack args
+      h, *rest = args
+      newh = torch.func.functional_call(self.c.one_mod, weight, args)
+      # next layer's input; and residual to be added to list
+      return (newh, *rest), None
+    _eval_one_layer = interop.gradient_checkpoint(
+      eval_one_layer,
+      kwargs={"policy": self.checkpoint_policy},
+    )
+    h, _ = scan(
+      _eval_one_layer,
+      args,
+      weights,
+    )
+    return h[0]

torchax/types.py CHANGED Viewed

@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Callable, Any, Union, ParamSpec, TypeAlias
-import torch
+from collections.abc import Callable
+from typing import Any, ParamSpec, TypeAlias, Union
 import jax
 import jax.numpy as jnp
-import sys
+import torch
-P = ParamSpec('P')
+P = ParamSpec("P")
-TorchValue: TypeAlias = Union[torch.Tensor, torch.dtype, 'TorchCallable', Any]
+TorchValue: TypeAlias = Union[torch.Tensor, torch.dtype, "TorchCallable", Any]
 TorchCallable: TypeAlias = Callable[P, TorchValue]
-JaxValue: TypeAlias = Union[jax.Array, jnp.dtype, 'JaxCallable', Any]
-JaxCallable: TypeAlias = Callable[P, JaxValue]
+JaxValue: TypeAlias = Union[jax.Array, jnp.dtype, "JaxCallable", Any]
+JaxCallable: TypeAlias = Callable[P, JaxValue]

torchax/util.py CHANGED Viewed

@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Callable
+from collections.abc import Callable
+from typing import Any
-def partition(original: list[Any],
-              func: Callable[[Any], bool]) -> tuple[list[Any], list[Any]]:
+def partition(
+  original: list[Any], func: Callable[[Any], bool]
+) -> tuple[list[Any], list[Any]]:
   """Partitions elements into two parallel lists based on a predicate function.
   Iterates through the 'original' list, applying 'func' to each element 'a'.
@@ -97,6 +99,6 @@ def merge(list1: list[Any], list2: list[Any]) -> list[Any]:
   """
   assert len(list1) == len(list2)
   res = []
-  for a, b in zip(list1, list2):
+  for a, b in zip(list1, list2, strict=False):
     res.append(b if a is None else a)
   return res

torchax 0.0.10.dev20251116__py3-none-any.whl → 0.0.11.dev202612__py3-none-any.whl

Potentially problematic release.

torchax 0.0.10.dev20251116py3-none-any.whl → 0.0.11.dev202612py3-none-any.whl