PyPI - torchax - Versions diffs - 0.0.6__py3-none-any.whl → 0.0.10.dev20251116__py3-none-any.whl - Mend

torchax 0.0.6py3-none-any.whl → 0.0.10.dev20251116py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchax might be problematic. Click here for more details.

Files changed (37) hide show

torchax/CONTRIBUTING.md +10 -5
torchax/__init__.py +92 -65
torchax/amp.py +14 -0
torchax/checkpoint.py +79 -0
torchax/config.py +14 -0
torchax/decompositions.py +14 -0
torchax/device_module.py +14 -0
torchax/export.py +14 -0
torchax/flax.py +14 -0
torchax/interop.py +44 -31
torchax/mesh_util.py +14 -0
torchax/ops/__init__.py +14 -0
torchax/ops/jaten.py +3985 -3686
torchax/ops/jax_reimplement.py +14 -0
torchax/ops/jc10d.py +14 -0
torchax/ops/jimage.py +14 -0
torchax/ops/jlibrary.py +14 -0
torchax/ops/jtorch.py +364 -309
torchax/ops/jtorchvision_nms.py +14 -0
torchax/ops/mappings.py +26 -4
torchax/ops/op_base.py +14 -0
torchax/ops/ops_registry.py +14 -0
torchax/tensor.py +38 -13
torchax/train.py +112 -97
torchax/types.py +14 -0
torchax/util.py +14 -0
torchax/view.py +14 -0
torchax-0.0.10.dev20251116.dist-info/METADATA +507 -0
torchax-0.0.10.dev20251116.dist-info/RECORD +31 -0
torchax-0.0.10.dev20251116.dist-info/licenses/LICENSE +201 -0
torchax/configuration.py +0 -30
torchax/environment.py +0 -1
torchax/tf_integration.py +0 -119
torchax-0.0.6.dist-info/METADATA +0 -307
torchax-0.0.6.dist-info/RECORD +0 -33
torchax-0.0.6.dist-info/licenses/LICENSE +0 -28
{torchax-0.0.6.dist-info → torchax-0.0.10.dev20251116.dist-info}/WHEEL +0 -0

torchax/ops/jtorchvision_nms.py CHANGED Viewed

@@ -1,3 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 Forked at: https://raw.githubusercontent.com/mlperf/training_results_v0.7/refs/heads/master/Google/benchmarks/ssd/implementations/ssd-research-JAX-tpu-v3-4096/nms.py
 """

torchax/ops/mappings.py CHANGED Viewed

@@ -1,3 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from jax import dlpack as jaxdl
 import jax.numpy as jnp
 import numpy
@@ -6,6 +20,14 @@ import torch.func
 import torch.utils.dlpack as torchdl
 import torch.utils._mode_utils as mode_utils
+NUMPY_UNSUPPORTED_DTYPES = {
+    torch.bfloat16: jnp.bfloat16,
+    torch.float8_e4m3fn: jnp.float8_e4m3fn,
+    torch.float8_e4m3fnuz: jnp.float8_e4m3fnuz,
+    torch.float8_e5m2: jnp.float8_e5m2,
+    torch.float8_e5m2fnuz: jnp.float8_e5m2fnuz,
+}
 def t2j(t, use_dlpack=True):
   is_bool = False
@@ -28,14 +50,14 @@ def t2j(t, use_dlpack=True):
   if res is None:
     # https://github.com/google/jax/issues/7657
     # https://github.com/google/jax/issues/17784
-    if t.dtype == torch.bfloat16:
+    if t.dtype in NUMPY_UNSUPPORTED_DTYPES:
       nparray = (t.cpu().detach().to(torch.float32).numpy()
-                )  # numpy don't support bfloat16
+                )  # handle dtypes not supported by numpy
     else:
       nparray = t.cpu().detach().numpy()
     res = jnp.asarray(nparray)
-    if t.dtype == torch.bfloat16:
-      res = res.astype(jnp.bfloat16)
+    if t.dtype in NUMPY_UNSUPPORTED_DTYPES:
+      res = res.astype(NUMPY_UNSUPPORTED_DTYPES[t.dtype])
   if is_bool:
     res = res.astype(jnp.bool_)

torchax/ops/op_base.py CHANGED Viewed

@@ -1,3 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import functools
 import jax
 import jax.numpy as jnp

torchax/ops/ops_registry.py CHANGED Viewed

@@ -1,3 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import dataclasses
 import logging
 from torchax.types import JaxCallable, TorchCallable

torchax/tensor.py CHANGED Viewed

@@ -1,3 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import threading
 import logging
 import sys
@@ -357,16 +371,26 @@ class Environment(contextlib.ContextDecorator):
     _prng_key = jax.random.key(torch.initial_seed() % (1 << 63))
     self._property = threading.local()
-    self._property.content = [
-        RuntimeProperty(
-            mesh=_mesh, prng=_prng_key, autocast_dtype=autocast_dtype)
-    ]
+    self._initial_content = RuntimeProperty(
+        mesh=_mesh, prng=_prng_key, autocast_dtype=autocast_dtype)
   @property
   def param(self):
+    if not hasattr(self._property, 'content'):
+      self._property.content = [
+        self._initial_content
+      ]
     return self._property.content[-1]
   def manual_seed(self, key):
+    if isinstance(key, torch.Tensor):
+        assert key.ndim == 0, 'manual seed can only take scalars'
+        assert not key.dtype.is_floating_point, 'manual seed can only be integers'
+        if isinstance(key, Tensor):
+            key = key._elem
+        else:
+            key = key.item()
     jax_key = jax.random.PRNGKey(key)
     new_prop = self.param.override(prng=jax_key)
     self._property.content.append(new_prop)
@@ -469,12 +493,12 @@ class Environment(contextlib.ContextDecorator):
         arr = self.t2j_copy(the_tensor)
         res = Tensor(arr, self, the_tensor.requires_grad)
-    if new_dtype is not None and new_dtype != the_tensor.dtype:
-      if isinstance(the_tensor, Tensor):
+    if new_dtype is not None and new_dtype != res.dtype:
+      if isinstance(res, Tensor):
         res = res.apply_jax(jnp.astype, mappings.t2j_dtype(new_dtype))
       else:
         with mode_utils.no_dispatch(), torch._C.DisableTorchFunction():
-          return the_tensor.to(device=new_device, dtype=new_dtype)
+          return res.to(device=new_device, dtype=new_dtype)
     return res
   def get_and_rotate_prng_key(self,
@@ -634,14 +658,14 @@ class Environment(contextlib.ContextDecorator):
   def t2j_iso(self, torchtensors):
     """Convert torchax Tensor to jax array.
     This function will not copy, will just unwrap the inner jax array out.
     Note: iso is short for "isomorphic"
     """
     def to_jax(x):
       if self.config.allow_mixed_math_with_scalar_tensor and not isinstance(
-          x, Tensor):
+          x, Tensor) and not isinstance(x, View):
         if x.squeeze().ndim == 0:
           return x.item()
       if isinstance(
@@ -667,7 +691,7 @@ class Environment(contextlib.ContextDecorator):
   def j2t_iso(self, jaxarray):
     """Convert jax array to torchax Tensor.
     This function will not copy, will just wrap the jax array with a torchax Tensor
     Note: iso is short for "isomorphic"
     """
@@ -676,7 +700,7 @@ class Environment(contextlib.ContextDecorator):
   def j2t_copy(self, args):
     """Convert torch.Tensor in cpu to a jax array
     This might involves copying the data (depending if dlpack is enabled)
     """
     return torch_pytree.tree_map_only(
@@ -686,7 +710,7 @@ class Environment(contextlib.ContextDecorator):
   def t2j_copy(self, args):
     """Convert jax array to torch.Tensor in cpu.
     This might involves copying the data (depending if dlpack is enabled)
     """
     return torch_pytree.tree_map_only(
@@ -694,13 +718,14 @@ class Environment(contextlib.ContextDecorator):
         lambda x: mappings.t2j(x, self.config.use_dlpack_for_data_conversion),
         args)
-  def override_op_definition(self, op_to_override, op_impl):
+  def override_op_definition(self, op_to_override, op_impl, is_view_op=False):
     self._ops[op_to_override] = ops_registry.Operator(
         op_to_override,
         op_impl,
         is_jax_function=False,
         is_user_defined=True,
         needs_env=False,
+        is_view_op=is_view_op,
     )
   @contextlib.contextmanager

torchax/train.py CHANGED Viewed

@@ -1,3 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import collections
 import functools
 import torch
@@ -12,106 +26,107 @@ mark_sharding = torch_view(jax.lax.with_sharding_constraint)
 def make_train_step(model_fn, loss_fn, optax_optimizer, remat_policy=None):
-  """Make a function that do one train step given model and loss.
-  model_fn: a function representing the model's forward:
-      i.e. has signature Callable[weights, buffers, args] -> result. Where,
-      weights is a pytree of trainable parameters
-      buffers is a pytree of non-trainable parameters / constants
-      args is the input data loaded from the data set
-      result is the return value of the model
-  loss_fn: a function to compute loss.
-      i.e. it has signature of Callable[result, label] -> loss
-      where, result is what model_fn returned
-        loss is loaded from the dataloader.
-  optax_optimizer: the optimizer from optax library. for example, optax.adam
-  remat_policy: One of jax.ad_checkpoint.checkpoint_policies, specifies how
-      to do gradient checkpointing. If None, then it means checkpoint everything.
-  """
-  env = torchax.default_env()
-  def loss(weights, buffers, args, label):  # inputs are XLATensor
-    with env, jax.named_scope('compute_loss'):
-      res = model_fn(weights, buffers, args)
-      l = loss_fn(res, label)
-      return l
-  loss = interop.gradient_checkpoint(loss, kwargs={'policy': remat_policy})
-  grad_fn = interop.jax_value_and_grad(loss)
-  def step(weights, buffers, opt_state, args, label):  #inputs are array
-    with jax.named_scope('compute_gradient'):
-      loss, gradient = grad_fn(weights, buffers, args, label)
-    with jax.named_scope("optimizer_updates"):
-      updates, opt_state = interop.call_jax(optax_optimizer.update, gradient,
-                                            opt_state, weights)
-      weights = interop.call_jax(optax.apply_updates, weights, updates)
-    return loss, weights, opt_state
-  # TODO: apply jax.jit so the user don't have to.
-  return step
+    """Make a function that do one train step given model and loss.
+    model_fn: a function representing the model's forward:
+        i.e. has signature Callable[weights, buffers, args] -> result. Where,
+        weights is a pytree of trainable parameters
+        buffers is a pytree of non-trainable parameters / constants
+        args is the input data loaded from the data set
+        result is the return value of the model
+    loss_fn: a function to compute loss.
+        i.e. it has signature of Callable[result, label] -> loss
+        where, result is what model_fn returned
+          loss is loaded from the dataloader.
+    optax_optimizer: the optimizer from optax library. for example, optax.adam
+    remat_policy: One of jax.ad_checkpoint.checkpoint_policies, specifies how
+        to do gradient checkpointing. If None, then it means checkpoint everything.
+    """
+    env = torchax.default_env()
+    def loss(weights, buffers, args, label):  # inputs are XLATensor
+        with env, jax.named_scope("compute_loss"):
+            res = model_fn(weights, buffers, args)
+            l = loss_fn(res, label)
+            return l
+    # loss = interop.gradient_checkpoint(loss, kwargs={'policy': remat_policy})
+    grad_fn = interop.jax_value_and_grad(loss)
+    def step(weights, buffers, opt_state, args, label):  # inputs are array
+        with jax.named_scope("compute_gradient"):
+            loss, gradient = grad_fn(weights, buffers, args, label)
+        with jax.named_scope("optimizer_updates"):
+            updates, opt_state = interop.call_jax(
+                optax_optimizer.update, gradient, opt_state, weights
+            )
+            weights = interop.call_jax(optax.apply_updates, weights, updates)
+        return loss, weights, opt_state
+    # TODO: apply jax.jit so the user don't have to.
+    return step
 class Container:
-  pass
+    pass
 class ScannedModule(torch.nn.Module):
-  def __init__(self, module_list, checkpoint_policy=None):
-    super().__init__()
-    self.c = None
-    assert module_list
-    self.c = Container()
-    self.c.one_mod = module_list[0]
-    self.checkpoint_policy = checkpoint_policy
-    weights = self._stack_layer_weights(module_list)
-    self.layer_weights_keys = list(self.c.one_mod.state_dict().keys())
-    self.params = torch.nn.ParameterDict({
-        self._param_name_new(k): v for k, v in weights.items()
-    })
-  def _stack_layer_weights(self, module_list):
-    # Create weights such that, for every [n, m] weights
-    # becomes [k, n, m] where k is number of layer
-    # i.e. stacking layer weights together
-    temp = collections.defaultdict(list)
-    for m in module_list:
-      for k, v in m.state_dict().items():
-        temp[k].append(v)
-    res = {k: torch.stack(v) for k, v in temp.items()}
-    return res
-  def _param_name_new(self, old):
-    return '___'.join(old.split('.'))
-  def _param_name_old(self, new):
-    return '.'.join(new.split('___'))
-  def forward(self, *args, **kwargs):
-    assert not kwargs
-    weights = {
-        k: self.params[self._param_name_new(k)] for k in self.layer_weights_keys
-    }
-    scan = interop.torch_view(jax.lax.scan)
-    def eval_one_layer(args, weight):
-      # unpack args
-      h, *rest = args
-      newh = torch.func.functional_call(self.c.one_mod, weight, args)
-      # next layer's input; and residual to be added to list
-      return (newh, *rest), None
-    _eval_one_layer = interop.gradient_checkpoint(
-        eval_one_layer,
-        kwargs={'policy': self.checkpoint_policy},
-    )
-    h, _ = scan(
-        _eval_one_layer,
-        args,
-        weights,
-    )
-    return h[0]
+    def __init__(self, module_list, checkpoint_policy=None):
+        super().__init__()
+        self.c = None
+        assert module_list
+        self.c = Container()
+        self.c.one_mod = module_list[0]
+        self.checkpoint_policy = checkpoint_policy
+        weights = self._stack_layer_weights(module_list)
+        self.layer_weights_keys = list(self.c.one_mod.state_dict().keys())
+        self.params = torch.nn.ParameterDict(
+            {self._param_name_new(k): v for k, v in weights.items()}
+        )
+    def _stack_layer_weights(self, module_list):
+        # Create weights such that, for every [n, m] weights
+        # becomes [k, n, m] where k is number of layer
+        # i.e. stacking layer weights together
+        temp = collections.defaultdict(list)
+        for m in module_list:
+            for k, v in m.state_dict().items():
+                temp[k].append(v)
+        res = {k: torch.stack(v) for k, v in temp.items()}
+        return res
+    def _param_name_new(self, old):
+        return "___".join(old.split("."))
+    def _param_name_old(self, new):
+        return ".".join(new.split("___"))
+    def forward(self, *args, **kwargs):
+        assert not kwargs
+        weights = {
+            k: self.params[self._param_name_new(k)]
+            for k in self.layer_weights_keys
+        }
+        scan = interop.torch_view(jax.lax.scan)
+        def eval_one_layer(args, weight):
+            # unpack args
+            h, *rest = args
+            newh = torch.func.functional_call(self.c.one_mod, weight, args)
+            # next layer's input; and residual to be added to list
+            return (newh, *rest), None
+        _eval_one_layer = interop.gradient_checkpoint(
+            eval_one_layer,
+            kwargs={"policy": self.checkpoint_policy},
+        )
+        h, _ = scan(
+            _eval_one_layer,
+            args,
+            weights,
+        )
+        return h[0]

torchax/types.py CHANGED Viewed

@@ -1,3 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Callable, Any, Union, ParamSpec, TypeAlias
 import torch
 import jax

torchax/util.py CHANGED Viewed

@@ -1,3 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Any, Callable

torchax/view.py CHANGED Viewed

@@ -1,3 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import torch
 import torch.utils._pytree as torch_pytree
 import jax

torchax 0.0.6__py3-none-any.whl → 0.0.10.dev20251116__py3-none-any.whl

Potentially problematic release.

torchax 0.0.6py3-none-any.whl → 0.0.10.dev20251116py3-none-any.whl