PyPI - torchax - Versions diffs - 0.0.10.dev20251118__py3-none-any.whl - Mend

torchax 0.0.10.dev20251118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchax might be problematic. Click here for more details.

Files changed (31) hide show

torchax/CONTRIBUTING.md +43 -0
torchax/__init__.py +149 -0
torchax/amp.py +218 -0
torchax/checkpoint.py +85 -0
torchax/config.py +44 -0
torchax/decompositions.py +796 -0
torchax/device_module.py +47 -0
torchax/export.py +258 -0
torchax/flax.py +55 -0
torchax/interop.py +369 -0
torchax/mesh_util.py +236 -0
torchax/ops/__init__.py +25 -0
torchax/ops/jaten.py +5753 -0
torchax/ops/jax_reimplement.py +211 -0
torchax/ops/jc10d.py +64 -0
torchax/ops/jimage.py +122 -0
torchax/ops/jlibrary.py +94 -0
torchax/ops/jtorch.py +608 -0
torchax/ops/jtorchvision_nms.py +268 -0
torchax/ops/mappings.py +139 -0
torchax/ops/op_base.py +137 -0
torchax/ops/ops_registry.py +74 -0
torchax/tensor.py +732 -0
torchax/train.py +130 -0
torchax/types.py +27 -0
torchax/util.py +104 -0
torchax/view.py +399 -0
torchax-0.0.10.dev20251118.dist-info/METADATA +507 -0
torchax-0.0.10.dev20251118.dist-info/RECORD +31 -0
torchax-0.0.10.dev20251118.dist-info/WHEEL +4 -0
torchax-0.0.10.dev20251118.dist-info/licenses/LICENSE +201 -0

torchax/train.py ADDED Viewed

@@ -0,0 +1,130 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import collections
+import jax
+import optax
+import torch
+import torchax
+from torchax import interop
+from torchax.interop import torch_view
+remat = torch_view(jax.remat)
+mark_sharding = torch_view(jax.lax.with_sharding_constraint)
+def make_train_step(model_fn, loss_fn, optax_optimizer, remat_policy=None):
+  """Make a function that do one train step given model and loss.
+  model_fn: a function representing the model's forward:
+      i.e. has signature Callable[weights, buffers, args] -> result. Where,
+      weights is a pytree of trainable parameters
+      buffers is a pytree of non-trainable parameters / constants
+      args is the input data loaded from the data set
+      result is the return value of the model
+  loss_fn: a function to compute loss.
+      i.e. it has signature of Callable[result, label] -> loss
+      where, result is what model_fn returned
+        loss is loaded from the dataloader.
+  optax_optimizer: the optimizer from optax library. for example, optax.adam
+  remat_policy: One of jax.ad_checkpoint.checkpoint_policies, specifies how
+      to do gradient checkpointing. If None, then it means checkpoint everything.
+  """
+  env = torchax.default_env()
+  def loss(weights, buffers, args, label):  # inputs are XLATensor
+    with env, jax.named_scope("compute_loss"):
+      res = model_fn(weights, buffers, args)
+      l = loss_fn(res, label)  # noqa: E741
+      return l
+  # loss = interop.gradient_checkpoint(loss, kwargs={'policy': remat_policy})
+  grad_fn = interop.jax_value_and_grad(loss)
+  def step(weights, buffers, opt_state, args, label):  # inputs are array
+    with jax.named_scope("compute_gradient"):
+      loss, gradient = grad_fn(weights, buffers, args, label)
+    with jax.named_scope("optimizer_updates"):
+      updates, opt_state = interop.call_jax(
+        optax_optimizer.update, gradient, opt_state, weights
+      )
+      weights = interop.call_jax(optax.apply_updates, weights, updates)
+    return loss, weights, opt_state
+  # TODO: apply jax.jit so the user don't have to.
+  return step
+class Container:
+  pass
+class ScannedModule(torch.nn.Module):
+  def __init__(self, module_list, checkpoint_policy=None):
+    super().__init__()
+    self.c = None
+    assert module_list
+    self.c = Container()
+    self.c.one_mod = module_list[0]
+    self.checkpoint_policy = checkpoint_policy
+    weights = self._stack_layer_weights(module_list)
+    self.layer_weights_keys = list(self.c.one_mod.state_dict().keys())
+    self.params = torch.nn.ParameterDict(
+      {self._param_name_new(k): v for k, v in weights.items()}
+    )
+  def _stack_layer_weights(self, module_list):
+    # Create weights such that, for every [n, m] weights
+    # becomes [k, n, m] where k is number of layer
+    # i.e. stacking layer weights together
+    temp = collections.defaultdict(list)
+    for m in module_list:
+      for k, v in m.state_dict().items():
+        temp[k].append(v)
+    res = {k: torch.stack(v) for k, v in temp.items()}
+    return res
+  def _param_name_new(self, old):
+    return "___".join(old.split("."))
+  def _param_name_old(self, new):
+    return ".".join(new.split("___"))
+  def forward(self, *args, **kwargs):
+    assert not kwargs
+    weights = {k: self.params[self._param_name_new(k)] for k in self.layer_weights_keys}
+    scan = interop.torch_view(jax.lax.scan)
+    def eval_one_layer(args, weight):
+      # unpack args
+      h, *rest = args
+      newh = torch.func.functional_call(self.c.one_mod, weight, args)
+      # next layer's input; and residual to be added to list
+      return (newh, *rest), None
+    _eval_one_layer = interop.gradient_checkpoint(
+      eval_one_layer,
+      kwargs={"policy": self.checkpoint_policy},
+    )
+    h, _ = scan(
+      _eval_one_layer,
+      args,
+      weights,
+    )
+    return h[0]

torchax/types.py ADDED Viewed

@@ -0,0 +1,27 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections.abc import Callable
+from typing import Any, ParamSpec, TypeAlias, Union
+import jax
+import jax.numpy as jnp
+import torch
+P = ParamSpec("P")
+TorchValue: TypeAlias = Union[torch.Tensor, torch.dtype, "TorchCallable", Any]
+TorchCallable: TypeAlias = Callable[P, TorchValue]
+JaxValue: TypeAlias = Union[jax.Array, jnp.dtype, "JaxCallable", Any]
+JaxCallable: TypeAlias = Callable[P, JaxValue]

torchax/util.py ADDED Viewed

@@ -0,0 +1,104 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections.abc import Callable
+from typing import Any
+def partition(
+  original: list[Any], func: Callable[[Any], bool]
+) -> tuple[list[Any], list[Any]]:
+  """Partitions elements into two parallel lists based on a predicate function.
+  Iterates through the 'original' list, applying 'func' to each element 'a'.
+  - If `func(a)` returns True, 'a' is appended to the first list ('truthy')
+    and `None` is appended to the second list ('falsy').
+  - If `func(a)` returns False, `None` is appended to the first list ('truthy')
+    and 'a' is appended to the second list ('falsy').
+  The result is two lists of the same length as the 'original' list, acting
+  as parallel representations of the partitioned elements, using `None` as
+  placeholders.
+  This is useful when we want to mark a group of elements as static (via passing
+  static_argnums) or donated (via donate_argnums) when combining with jax.jit
+  and friends.
+  Args:
+      original: The list of elements to partition.
+      func: A callable (function or lambda) that accepts an element from
+            'original' and returns a boolean value (True or False).
+  Returns:
+      A tuple containing two lists (`truthy`, `falsy`), both of the same
+      length as `original`:
+      - The first list contains elements `x` where `func(x)` was True, and
+        `None` otherwise.
+      - The second list contains elements `x` where `func(x)` was False, and
+        `None` otherwise.
+  Example:
+      >>> def is_even(n): return n % 2 == 0
+      >>> nums = [1, 2, 3, 4, 5, 6]
+      >>> truthy_list, falsy_list = partition(nums, is_even)
+      >>> truthy_list
+      [None, 2, None, 4, None, 6]
+      >>> falsy_list
+      [1, None, 3, None, 5, None]
+  """
+  truthy = []
+  falsy = []
+  for a in original:
+    t, f = (a, None) if func(a) else (None, a)
+    truthy.append(t)
+    falsy.append(f)
+  return truthy, falsy
+def merge(list1: list[Any], list2: list[Any]) -> list[Any]:
+  """Merges two lists element-wise, prioritizing non-None elements from list1.
+  Creates a new list where each element is taken from the corresponding position
+  in 'list1', unless that element is None, in which case the element from the
+  corresponding position in 'list2' is used. Assumes both lists have the
+  same length.
+  Invariant: merge(*partion(input_list, predicate)) == input_list for any predicate
+  Args:
+      list1: The primary list. Its elements are preferred unless they are None.
+      list2: The secondary list. Its elements are used as fallbacks when the
+              corresponding element in list1 is None.
+  Returns:
+      A new list representing the merged result.
+  Raises:
+      AssertionError: If 'list1' and 'list2' do not have the same length.
+  Example:
+      >>> l1 = [1, None, 3, None]
+      >>> l2 = [None, 2, None, 4]
+      >>> merge(l1, l2)
+      [1, 2, 3, 4]
+      >>> l3 = [None, 'b', None]
+      >>> l4 = ['a', None, 'c']
+      >>> merge(l3, l4)
+      ['a', 'b', 'c']
+  """
+  assert len(list1) == len(list2)
+  res = []
+  for a, b in zip(list1, list2, strict=False):
+    res.append(b if a is None else a)
+  return res

torchax/view.py ADDED Viewed

@@ -0,0 +1,399 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Any
+import jax
+import torch
+# Reference to original PyTorch native functions
+# https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/native_functions.yaml
+class ViewInfoType(Enum):
+  INVALID = 0
+  NARROW = 1
+  NO_OP = 2
+  PERMUTE = 3
+  RESHAPE = 4
+  RESIZE = 5
+  SELECT = 6
+  AS_STRIDED = 7
+  DIAGONAL = 8
+class ViewInfo(ABC):
+  """
+  Abstract base class for all view operations.
+  Defines the interface for applying and updating view transformations.
+  """
+  def __init__(
+    self,
+    view_info_type: ViewInfoType = ViewInfoType.INVALID,
+  ):
+    """
+    Initialize a ViewInfo object.
+    Args:
+        view_info_type: The type of view operation
+    """
+    self.view_info_type = view_info_type
+  @abstractmethod
+  def update_tensor(self, new_value: jax.Array, jax_array: jax.Array) -> jax.Array:
+    """
+    Apply this view transformation to a JAX array and update its value.
+    Args:
+        new_value: The new values to set in the view
+        jax_array: The parent array to update
+    Returns:
+        Updated array
+    """
+    pass
+  @abstractmethod
+  def transform_tensor(self, jax_array: jax.Array) -> jax.Array:
+    """
+    Apply this view transformation to a JAX array.
+    Args:
+        jax_array: The array to transform
+    Returns:
+        Transformed array
+    """
+    pass
+  @abstractmethod
+  def calculate_output_shape(self, source: jax.Array) -> list[int]:
+    """
+    Calculate the resulting shape after applying this view.
+    Args:
+        source: Original jax array before transformation
+    Returns:
+        Resulting shape after transformation
+    """
+    pass
+class NarrowInfo(ViewInfo):
+  """
+  Represents a slicing operation on a tensor.
+  Handles operations like tensor[1:3, :, 2:5:2].
+  """
+  def __init__(self, slices: slice | tuple[slice]) -> None:
+    """
+    Args:
+        slices: The slice(s) to apply to the tensor.
+            E.g. jax_array.at[slices] will return the transformed tensor.
+    """
+    super().__init__(ViewInfoType.NARROW)
+    self.slices = slices
+  def __eq__(self, other: object) -> bool:
+    if not isinstance(other, NarrowInfo):
+      return False
+    return self.slices == other.slices
+  def transform_tensor(self, jax_array: jax.Array) -> jax.Array:
+    try:
+      return jax_array[self.slices]
+    except IndexError as e:
+      raise IndexError("Invalid slice operation") from e
+  def update_tensor(self, new_value: jax.Array, jax_array: jax.Array) -> jax.Array:
+    return jax_array.at[self.slices].set(new_value)
+  def calculate_output_shape(self, source: jax.Array) -> list[int]:
+    return source[self.slices].shape
+class SelectInfo(ViewInfo):
+  """
+  Represents a selection operation on a tensor.
+  Typically used for indexing operations that select specific elements.
+  """
+  def __init__(
+    self, dim: int = 0, start: int = 0, end: int = 0, stride: int = 0
+  ) -> None:
+    super().__init__(ViewInfoType.SELECT)
+    self.dim: int = dim
+    self.start: int = start
+    self.end: int = end
+    self.stride: int = stride
+  def __eq__(self, other: object) -> bool:
+    if not isinstance(other, SelectInfo):
+      return False
+    return (
+      self.dim == other.dim
+      and self.start == other.start
+      and self.end == other.end
+      and self.stride == other.stride
+    )
+  def transform_tensor(self, jax_array: jax.Array) -> jax.Array:
+    raise NotImplementedError("SelectInfo.apply not implemented")
+  def update_tensor(self, new_value: jax.Array, jax_array: jax.Array) -> jax.Array:
+    raise NotImplementedError("SelectInfo.update not implemented")
+  def calculate_output_shape(self, source: jax.Array) -> list[int]:
+    raise NotImplementedError("SelectInfo.calculate_output_shape not implemented")
+class AsStridedInfo(ViewInfo):
+  """
+  Information for as_strided operations.
+  """
+  def __init__(self, stride: list[int], offset: int = 0) -> None:
+    super().__init__(ViewInfoType.AS_STRIDED)
+    self.stride: list[int] = stride
+    self.offset: int = offset
+  def __eq__(self, other: object) -> bool:
+    if not isinstance(other, AsStridedInfo):
+      return False
+    return self.offset == other.offset and self.stride == other.stride
+  def transform_tensor(self, jax_array: jax.Array) -> jax.Array:
+    raise NotImplementedError("AsStridedInfo.apply not implemented")
+  def update_tensor(self, new_value: jax.Array, jax_array: jax.Array) -> jax.Array:
+    raise NotImplementedError("AsStridedInfo.update not implemented")
+  def calculate_output_shape(self, source: jax.Array) -> list[int]:
+    raise NotImplementedError("AsStridedInfo.calculate_output_shape not implemented")
+class DiagonalInfo(ViewInfo):
+  """
+  Information for diagonal operations.
+  Extracts diagonal elements from a tensor.
+  """
+  def __init__(self, offset: int = 0, dim1: int = 0, dim2: int = 1) -> None:
+    """
+    Args:
+        offset: Offset from the main diagonal
+        dim1: First dimension for diagonal extraction
+        dim2: Second dimension for diagonal extraction
+    """
+    super().__init__(ViewInfoType.DIAGONAL)
+    self.offset: int = offset
+    self.dim1: int = dim1
+    self.dim2: int = dim2
+  def __eq__(self, other: object) -> bool:
+    if not isinstance(other, DiagonalInfo):
+      return False
+    return (
+      self.offset == other.offset
+      and self.dim1 == other.dim1
+      and self.dim2 == other.dim2
+    )
+  def transform_tensor(self, jax_array: jax.Array) -> jax.Array:
+    raise NotImplementedError("DiagonalInfo.apply not implemented")
+  def update_tensor(self, new_value: jax.Array, jax_array: jax.Array) -> jax.Array:
+    raise NotImplementedError("DiagonalInfo.update not implemented")
+  def calculate_output_shape(self, source: jax.Array) -> list[int]:
+    raise NotImplementedError("DiagonalInfo.calculate_output_shape not implemented")
+class View(torch.Tensor):
+  """
+  A View is a reference to another Tensor or another View,
+  with a transformation applied to it.
+  """
+  @staticmethod
+  def __new__(
+    cls,
+    parent: torchax.Tensor | View,  # noqa: F821
+    view_info: ViewInfo,
+    env: Any,
+  ) -> View:
+    """
+    Args:
+        parent: Parent tensor or view
+        view_info: Information about the view transformation
+        env: Environment for tensor operations
+    """
+    shape = view_info.calculate_output_shape(parent.jax())
+    return torch.Tensor._make_wrapper_subclass(
+      cls,
+      shape,
+      device="meta",
+      dtype=parent.dtype,
+      requires_grad=False,
+    )
+  def __init__(
+    self,
+    parent: torchax.Tensor | View,  # noqa: F821
+    view_info: ViewInfo,
+    env: Any,
+  ) -> None:
+    super().__init__()
+    self.parent = parent
+    self.view_info = view_info
+    self._env = env
+  def get_transformation_chain(self) -> list[ViewInfo]:
+    """
+    Get all view transformations from the source tensor to this view.
+    """
+    if isinstance(self.parent, View):
+      transformations = self.parent.get_transformation_chain()
+      transformations.append(self.view_info)
+      return transformations
+    else:
+      return [self.view_info]
+  __torch_function__ = torch._C._disabled_torch_function_impl
+  def source_jax(self) -> jax.Array:
+    """
+    Returns the source tensor.
+    """
+    if isinstance(self.parent, View):
+      return self.parent.source_jax()
+    else:
+      return self.parent.jax()
+  def replace_source_jax(self, new_value: jax.Array) -> None:
+    """
+    Update the source tensor with new values.
+    """
+    if isinstance(self.parent, View):
+      self.parent.replace_source_jax(new_value)
+    else:
+      assert new_value.shape == self.parent._elem.shape
+      self.parent._elem = new_value
+  def torch(self) -> torchax.Tensor:  # noqa: F821
+    """
+    Returns a Torchax tensor representing this view after all transformations
+    """
+    from torchax.tensor import Tensor
+    return Tensor(self.jax(), self._env)
+  def update(
+    self,
+    new_values: jax.Array | View | torchax.Tensor,  # noqa: F821
+    view_infos: list[ViewInfo] | None = None,
+  ) -> None:
+    """
+    Update this view with new values, propagating changes back to source.
+    If view_infos is None, it will use the transformation chain
+    from the source tensor.
+    """
+    if view_infos is None:
+      view_infos = self.get_transformation_chain()
+    # Get the source JAX array
+    source_array = self.source_jax()
+    # Get the new value
+    from torchax.tensor import Tensor
+    if isinstance(new_values, View) or isinstance(new_values, Tensor):
+      new_values = new_values.jax()
+    # Apply all view transformations to the source array
+    # And store intermediate values
+    intermediate_values = [source_array]
+    for view_info in view_infos[:-1]:
+      intermediate_values.append(view_info.transform_tensor(intermediate_values[-1]))
+    # TODO: Investigate efficiency of this algorithm
+    # Update the source array with the new value by
+    # applying inverse transformations in reverse order
+    for view_info, parent_array in zip(
+      reversed(view_infos), reversed(intermediate_values), strict=False
+    ):
+      # Apply the inverse transformation to propagate changes back
+      new_values = view_info.update_tensor(new_values, parent_array)
+    # Update the source tensor with the new values
+    self.replace_source_jax(new_values)
+  @classmethod
+  def __torch_dispatch__(
+    cls,
+    func: Any,
+    types: tuple[Any, ...],
+    args: tuple[Any, ...] = (),
+    kwargs: dict | None = None,
+  ) -> Any:
+    raise AssertionError(
+      "torchax Tensors can only do math within the torchax environment."
+      "Please wrap your code with `with torchax.default_env()` or "
+      "call torchax.enable_globally() before."
+    )
+  def create_sub_view(self, view_info: ViewInfo) -> View:
+    """
+    Create a new view that is a child of this view.
+    """
+    return View(self, view_info, self._env)
+  def __str__(self) -> str:
+    return f"View({self.torch()})"
+  def jax(self) -> jax.Array:
+    """
+    Returns a copy of the source tensor after transformations.
+    """
+    result = self.source_jax()
+    for view_info in self.get_transformation_chain():
+      result = view_info.transform_tensor(result)
+    return result
+  def __setitem__(self, indexes, val):
+    view_infos = self.get_transformation_chain() + [NarrowInfo(indexes)]
+    self.update(view_infos=view_infos, new_values=val)
+  def dim(self):
+    return self.ndim
+  @property
+  def device(self):
+    return torch.device("jax:0")
+  @property
+  def jax_device(self):
+    return self.jax().device
+  @property
+  def ndim(self):
+    return len(self.shape)
+  __repr__ = __str__