PyPI - torchax - Versions diffs - 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl - Mend

torchax 0.0.4py3-none-any.whl → 0.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchax might be problematic. Click here for more details.

Files changed (31) hide show

torchax/CONTRIBUTING.md +2 -2
torchax/__init__.py +57 -19
torchax/amp.py +333 -0
torchax/config.py +19 -12
torchax/decompositions.py +663 -195
torchax/device_module.py +7 -1
torchax/distributed.py +55 -60
torchax/export.py +26 -17
torchax/flax.py +39 -0
torchax/interop.py +275 -141
torchax/mesh_util.py +211 -0
torchax/ops/jaten.py +1718 -1294
torchax/ops/jax_reimplement.py +23 -21
torchax/ops/jc10d.py +5 -4
torchax/ops/jimage.py +113 -0
torchax/ops/jlibrary.py +9 -2
torchax/ops/jtorch.py +219 -78
torchax/ops/jtorchvision_nms.py +32 -43
torchax/ops/mappings.py +77 -35
torchax/ops/op_base.py +59 -32
torchax/ops/ops_registry.py +40 -35
torchax/tensor.py +417 -275
torchax/train.py +38 -41
torchax/util.py +88 -0
torchax/view.py +377 -0
{torchax-0.0.4.dist-info → torchax-0.0.5.dist-info}/METADATA +111 -145
torchax-0.0.5.dist-info/RECORD +32 -0
torchax/environment.py +0 -2
torchax-0.0.4.dist-info/RECORD +0 -27
{torchax-0.0.4.dist-info → torchax-0.0.5.dist-info}/WHEEL +0 -0
{torchax-0.0.4.dist-info → torchax-0.0.5.dist-info}/licenses/LICENSE +0 -0

torchax/device_module.py CHANGED Viewed

@@ -1,20 +1,26 @@
 def _is_in_bad_fork():
   return False
 def manual_seed_all(seed):
   pass
 def device_count():
   return 1
 def get_rng_state():
   return []
 def set_rng_state(new_state, device):
   pass
 def is_available():
   return True
 def current_device():
-  return 0
+  return 0

torchax/distributed.py CHANGED Viewed

@@ -51,64 +51,61 @@ class ProcessGroupJax(ProcessGroup):
   @staticmethod
   def _work(
-    tensors: Union[torch.Tensor, List[torch.Tensor], List[List[torch.Tensor]]],
+      tensors: Union[torch.Tensor, List[torch.Tensor],
+                     List[List[torch.Tensor]]],
   ) -> dist.Work:
     fut = torch.futures.Future()
     fut.set_result(tensors)
     return torch._C._distributed_c10d._create_work_from_future(fut)
   def _allgather_base(
-    self,
-    output: torch.Tensor,
-    input: torch.Tensor,
-    opts=...,
+      self,
+      output: torch.Tensor,
+      input: torch.Tensor,
+      opts=...,
   ) -> dist.Work:
     assert isinstance(input, torchax.tensor.Tensor)
     assert isinstance(output, torchax.tensor.Tensor)
     torch.distributed._functional_collectives.all_gather_tensor_inplace(
-      output, input, group=self
-    )
+        output, input, group=self)
     return self._work(output)
   def allreduce(
-    self,
-    tensors: List[torch.Tensor],
-    opts: dist.AllreduceOptions = ...,
+      self,
+      tensors: List[torch.Tensor],
+      opts: dist.AllreduceOptions = ...,
   ) -> dist.Work:
     assert len(tensors) == 1
     assert isinstance(tensors[0], torchax.tensor.Tensor)
     torch.distributed._functional_collectives.all_reduce_inplace(
-      tensors[0],
-      torch.distributed._functional_collectives.REDUCE_OP_TO_STR[
-        opts.reduceOp.op
-      ],
-      self,
+        tensors[0],
+        torch.distributed._functional_collectives.REDUCE_OP_TO_STR[
+            opts.reduceOp.op],
+        self,
     )
     return self._work(tensors)
   def broadcast(
-    self,
-    tensors: List[torch.Tensor],
-    opts: dist.BroadcastOptions = ...,
+      self,
+      tensors: List[torch.Tensor],
+      opts: dist.BroadcastOptions = ...,
   ) -> dist.Work:
     assert len(tensors) == 1
     assert isinstance(tensors[0], torchax.tensor.Tensor)
     tensors[0].copy_(
-      torch.distributed._functional_collectives.broadcast(
-        tensors[0], opts.rootRank, group=self
-      )
-    )
+        torch.distributed._functional_collectives.broadcast(
+            tensors[0], opts.rootRank, group=self))
     return self._work(tensors)
-dist.Backend.register_backend("jax", ProcessGroupJax)
+dist.Backend.register_backend("jax", ProcessGroupJax, devices=["jax"])
-def jax_rendezvous_handler(
-  url: str, timeout: datetime.timedelta = ..., **kwargs
-):
+def jax_rendezvous_handler(url: str,
+                           timeout: datetime.timedelta = ...,
+                           **kwargs):
   """Initialize distributed store with JAX process IDs.
   Requires `$MASTER_ADDR` and `$MASTER_PORT`.
@@ -120,10 +117,10 @@ def jax_rendezvous_handler(
   master_port = int(os.environ["MASTER_PORT"])
   # TODO(wcromar): Use `torchrun`'s store if available
   store = dist.TCPStore(
-    master_ip,
-    master_port,
-    jax.process_count(),
-    is_master=jax.process_index() == 0,
+      master_ip,
+      master_port,
+      jax.process_count(),
+      is_master=jax.process_index() == 0,
   )
   yield (store, jax.process_index(), jax.process_count())
@@ -145,9 +142,9 @@ def spawn(f, args=(), env: Optional[torchax.tensor.Environment] = None):
     torch_outputs = f(index, *args)
     return env.t2j_iso(torch_outputs)
-  jax_outputs = jax.pmap(jax_wrapper, axis_name="torch_dist")(
-    np.arange(jax.device_count()), env.t2j_iso(args)
-  )
+  jax_outputs = jax.pmap(
+      jax_wrapper, axis_name="torch_dist")(np.arange(jax.device_count()),
+                                           env.t2j_iso(args))
   return env.j2t_iso(jax_outputs)
@@ -172,11 +169,12 @@ class DistributedDataParallel(torch.nn.Module):
     jax_output = jax_model(jax_data)
   ```
   """
   def __init__(
-    self,
-    module: torch.nn.Module,
-    env: Optional[torchax.tensor.Environment] = None,
-    **kwargs,
+      self,
+      module: torch.nn.Module,
+      env: Optional[torchax.tensor.Environment] = None,
+      **kwargs,
   ):
     if kwargs:
       logging.warning(f"Unsupported kwargs {kwargs}")
@@ -184,17 +182,15 @@ class DistributedDataParallel(torch.nn.Module):
     super().__init__()
     self._env = env or torchax.default_env()
     self._mesh = Mesh(
-      mesh_utils.create_device_mesh((jax.device_count(),)),
-      axis_names=("batch",),
+        mesh_utils.create_device_mesh((jax.device_count(),)),
+        axis_names=("batch",),
     )
     replicated_state = torch_pytree.tree_map_only(
-      torch.Tensor,
-      lambda t: self._env.j2t_iso(
-        jax.device_put(
-          self._env.to_xla(t)._elem, NamedSharding(self._mesh, P())
-        )
-      ),
-      module.state_dict(),
+        torch.Tensor,
+        lambda t: self._env.j2t_iso(
+            jax.device_put(
+                self._env.to_xla(t)._elem, NamedSharding(self._mesh, P()))),
+        module.state_dict(),
     )
     # TODO: broadcast
     module.load_state_dict(replicated_state, assign=True)
@@ -208,25 +204,24 @@ class DistributedDataParallel(torch.nn.Module):
     global_batch_shape = (global_batch_size,) + inp.shape[1:]
     sharding = NamedSharding(self._mesh, P("batch"))
-    return self._env.j2t_iso(jax.make_array_from_single_device_arrays(
-      global_batch_shape,
-      NamedSharding(self._mesh, P("batch")),
-      arrays=[
-        jax.device_put(self._env.to_xla(batch)._elem, device)
-        for batch, device in zip(
-          per_replica_batches, sharding.addressable_devices
-        )
-      ],
-    ))
+    return self._env.j2t_iso(
+        jax.make_array_from_single_device_arrays(
+            global_batch_shape,
+            NamedSharding(self._mesh, P("batch")),
+            arrays=[
+                jax.device_put(self._env.to_xla(batch)._elem, device) for batch,
+                device in zip(per_replica_batches, sharding.addressable_devices)
+            ],
+        ))
   def replicate_input(self, inp):
     return self._env.j2t_iso(
-      jax.device_put(inp._elem, NamedSharding(self._mesh, P()))
-    )
+        jax.device_put(inp._elem, NamedSharding(self._mesh, P())))
   def jit_step(self, func):
-    @functools.partial(interop.jax_jit,
-                       kwargs_for_jax_jit={'donate_argnums': 0})
+    @functools.partial(
+        interop.jax_jit, kwargs_for_jax_jit={'donate_argnums': 0})
     def _jit_fn(states, args):
       self.load_state_dict(states)
       outputs = func(*args)

torchax/export.py CHANGED Viewed

@@ -4,14 +4,14 @@ import copy
 from typing import Any, Dict, Tuple
 import torch
 from torch.utils import _pytree as pytree
+import torchax
 from torchax import tensor
-from torchax.ops import ops_registry
+from torchax.ops import ops_registry, mappings
 from torchax import decompositions
 import jax
 import jax.export
 import sympy
 DEBUG = False
@@ -83,7 +83,8 @@ def exported_program_to_jax(exported_program, export_raw: bool = False):
   if torch.__version__ >= '2.2':
     # torch version 2.1 didn't expose this yet
     exported_program = exported_program.run_decompositions()
-    exported_program = exported_program.run_decompositions(decompositions.EXTRA_DECOMP)
+    exported_program = exported_program.run_decompositions(
+        decompositions.DECOMPOSITIONS)
   if DEBUG:
     print(exported_program.graph_module.code)
@@ -108,8 +109,8 @@ def exported_program_to_jax(exported_program, export_raw: bool = False):
   if export_raw:
     return names, states, func
-  states = pytree.tree_map_only(torch.Tensor, tensor.t2j, states)
+  env = torchax.default_env()
+  states = env.t2j_copy(states)
   return states, func
@@ -121,34 +122,35 @@ def extract_avals(exported):
   def _to_aval(arg_meta, symbolic_shapes):
     """Convet from torch type to jax abstract value for export tracing
     """
     def _get_dim(d):
       if isinstance(d, torch.SymInt):
         return symbolic_shapes[str(d)]
       return d
     val = arg_meta['val']
-    is_scalar = isinstance(val, float) or isinstance(val, int) or isinstance(val, bool)
+    is_scalar = isinstance(val, float) or isinstance(val, int) or isinstance(
+        val, bool)
     if is_scalar:
       return jax.ShapeDtypeStruct([], type(arg_meta['val']))
     tensor_meta = arg_meta['tensor_meta']
     shape = [_get_dim(d) for d in tensor_meta.shape]
-    return jax.ShapeDtypeStruct(shape, tensor.t2j_dtype(tensor_meta.dtype))
+    return jax.ShapeDtypeStruct(shape, mappings.t2j_dtype(tensor_meta.dtype))
   def _get_inputs(exported):
     """Return placeholders with input metadata"""
     placeholders = [p for p in exported.graph.nodes if p.op == "placeholder"]
     input_placeholders = [
-      p
-      for p, s in zip(placeholders, exported.graph_signature.input_specs)
-      if s.kind == torch.export.graph_signature.InputKind.USER_INPUT
+        p for p, s in zip(placeholders, exported.graph_signature.input_specs)
+        if s.kind == torch.export.graph_signature.InputKind.USER_INPUT
     ]
     return input_placeholders
   def _build_symbolic_shapes(range_constraints):
     """Convert torch SymInt to JAX symbolic_shape and stores in a map using the
     string name of the torch symbolic int.
     TODO: There is probably a better way of storing a key for a symbolic int.
     This value needs to be looked up again in `_to_aval` to figure out which
     JAX symbolic to map to for a given torch tensor.
@@ -163,8 +165,10 @@ def extract_avals(exported):
         torch.export.Dim("a", min=5, max=10)
          ==> ("a >= 5", "a <= 10",)
       """
-      if not isinstance(torch_constraint, torch.utils._sympy.value_ranges.ValueRanges) or torch_constraint.is_bool:
-        raise TypeError(f"No symbolic constraint handler for: {torch_constraint}")
+      if not isinstance(torch_constraint, torch.utils._sympy.value_ranges.
+                        ValueRanges) or torch_constraint.is_bool:
+        raise TypeError(
+            f"No symbolic constraint handler for: {torch_constraint}")
       constraints = []
       symbol = sympy.Symbol(symbol_name)
@@ -182,7 +186,7 @@ def extract_avals(exported):
       There are two possible sympy `sym` inputs:
         1. Symbol - (s0) These can have custom constraints.
         2. Expr - (s0*2) These apply the expr to s0's constraints, cannot override.
         Currently support is limited to operations with a symbol and and int,
         in `torch/export/dynamic_shapes.py`:
         "Only increasing linear operations with integer coefficients are supported."
@@ -190,7 +194,8 @@ def extract_avals(exported):
       symbol_name = str(sym)
       constraints = _build_symbolic_constraints(symbol_name, constraint)
       if sym.is_symbol:
-        symbolic_shape = jax.export.symbolic_shape(symbol_name, constraints=constraints)
+        symbolic_shape = jax.export.symbolic_shape(
+            symbol_name, constraints=constraints)
       else:
         assert len(sym.free_symbols) > 0
         scope = free_symbols[str(list(sym.free_symbols)[0])].scope
@@ -203,8 +208,12 @@ def extract_avals(exported):
     # integer compuations on symbol variables, so each symbol variable is OK to
     # have its own scope.
     symbolic_shapes = {}
-    symbol_variables = [(s,v) for s,v in range_constraints.items() if s.is_symbol]
-    symbol_exprs = [(s,v) for s,v in range_constraints.items() if not s.is_symbol]
+    symbol_variables = [
+        (s, v) for s, v in range_constraints.items() if s.is_symbol
+    ]
+    symbol_exprs = [
+        (s, v) for s, v in range_constraints.items() if not s.is_symbol
+    ]
     for sym, constraint in symbol_variables + symbol_exprs:
       symbolic_shape = _build_symbolic_shape(sym, constraint, symbolic_shapes)
       symbolic_shapes[str(sym)] = symbolic_shape

torchax/flax.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""Flax interop."""
+import torch
+import torchax as tx
+import torchax.interop
+class FlaxNNModule(torch.nn.Module):
+  def __init__(self, env, flax_module, sample_args, sample_kwargs=None):
+    super().__init__()
+    prng = env.prng_key
+    sample_kwargs = sample_kwargs or {}
+    parameter_dict = tx.interop.call_jax(flax_module.init, prng, *sample_args,
+                                         **sample_kwargs)
+    self._params = self._encode_nested_dict(parameter_dict)
+    self._flax_module = flax_module
+  def _encode_nested_dict(self, nested_dict):
+    child_module = torch.nn.Module()
+    for k, v in nested_dict.items():
+      if isinstance(v, dict):
+        child_module.add_module(k, self._encode_nested_dict(v))
+      else:
+        child_module.register_parameter(k, torch.nn.Parameter(v))
+    return child_module
+  def _decode_nested_dict(self, child_module):
+    result = dict(child_module.named_parameters(recurse=False))
+    for k, v in child_module.named_children():
+      result[k] = self._decode_nested_dict(v)
+    return result
+  def forward(self, *args, **kwargs):
+    nested_dict_params = self._decode_nested_dict(self._params)
+    return tx.interop.call_jax(self._flax_module.apply, nested_dict_params,
+                               *args, **kwargs)

torchax 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl

Potentially problematic release.

torchax 0.0.4py3-none-any.whl → 0.0.5py3-none-any.whl