PyPI - torchax - Versions diffs - 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

torchax 0.0.4py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchax might be problematic. Click here for more details.

Files changed (32) hide show

torchax/CONTRIBUTING.md +2 -2
torchax/__init__.py +26 -24
torchax/amp.py +332 -0
torchax/config.py +25 -14
torchax/configuration.py +30 -0
torchax/decompositions.py +663 -195
torchax/device_module.py +14 -1
torchax/environment.py +0 -1
torchax/export.py +26 -17
torchax/flax.py +39 -0
torchax/interop.py +288 -141
torchax/mesh_util.py +220 -0
torchax/ops/jaten.py +1723 -1297
torchax/ops/jax_reimplement.py +23 -21
torchax/ops/jc10d.py +5 -4
torchax/ops/jimage.py +113 -0
torchax/ops/jlibrary.py +9 -2
torchax/ops/jtorch.py +237 -88
torchax/ops/jtorchvision_nms.py +32 -43
torchax/ops/mappings.py +77 -35
torchax/ops/op_base.py +59 -32
torchax/ops/ops_registry.py +40 -35
torchax/tensor.py +442 -288
torchax/train.py +38 -41
torchax/util.py +88 -0
torchax/view.py +377 -0
{torchax-0.0.4.dist-info → torchax-0.0.6.dist-info}/METADATA +111 -145
torchax-0.0.6.dist-info/RECORD +33 -0
torchax/distributed.py +0 -246
torchax-0.0.4.dist-info/RECORD +0 -27
{torchax-0.0.4.dist-info → torchax-0.0.6.dist-info}/WHEEL +0 -0
{torchax-0.0.4.dist-info → torchax-0.0.6.dist-info}/licenses/LICENSE +0 -0

torchax/ops/jtorch.py CHANGED Viewed

@@ -1,8 +1,9 @@
 """Tensor constructor overrides"""
 import math
 import collections.abc
 import functools
-from typing import Optional, Sequence
+from typing import Optional, Sequence, Tuple
 import numpy as np
 import jax
@@ -12,8 +13,10 @@ from jax.experimental.shard_map import shard_map
 import torch
 from torchax.ops.ops_registry import register_torch_function_op
-from torchax.ops import op_base, mappings, jaten
+from torchax.ops import op_base, mappings, jaten, jimage
 import torchax.tensor
+from torchax.view import View, NarrowInfo
+import torch.utils._pytree as pytree
 def register_function(torch_func, **kwargs):
@@ -21,7 +24,8 @@ def register_function(torch_func, **kwargs):
 @register_function(torch.as_tensor, is_jax_function=False, needs_env=True)
-@op_base.convert_dtype(use_default_dtype=False)  # Attempt to infer type from elements
+@op_base.convert_dtype(
+    use_default_dtype=False)  # Attempt to infer type from elements
 def _as_tensor(data, dtype=None, device=None, env=None):
   if isinstance(data, torch.Tensor):
     return env._to_copy(data, dtype, device)
@@ -33,7 +37,8 @@ def _as_tensor(data, dtype=None, device=None, env=None):
 @register_function(torch.tensor)
-@op_base.convert_dtype(use_default_dtype=False)  # Attempt to infer type from elements
+@op_base.convert_dtype(
+    use_default_dtype=False)  # Attempt to infer type from elements
 def _tensor(data, *, dtype=None, **kwargs):
   python_types_to_torch_types = {
       bool: jnp.bool,
@@ -57,8 +62,8 @@ def _aten_allclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False):
 @register_function(torch.angle)
 def _torch_angle(input):
-  if input.dtype.name == 'int64':
-    input = input.astype(jnp.dtype('float32'))
+  if input.dtype.name == "int64":
+    input = input.astype(jnp.dtype("float32"))
   return jnp.angle(input)
@@ -72,19 +77,21 @@ def _torch_argsort(input, dim=-1, descending=False, stable=False):
     # behavior is the same as a jnp array of rank 1
     expanded = True
     input = jnp.expand_dims(input, 0)
-  res = jnp.argsort(input, axis=dim, descending=descending,
-                     stable=stable)
+  res = jnp.argsort(input, axis=dim, descending=descending, stable=stable)
   if expanded:
     res = res.squeeze()
   return res
 @register_function(torch.diag)
 def _diag(input, diagonal=0):
   return jnp.diag(input, k=diagonal)
 @register_function(torch.einsum)
 @register_function(torch.ops.aten.einsum)
 def _einsum(equation, *operands):
   def get_params(*a):
     inner_list = a[0]
     if not isinstance(inner_list, jax.Array):
@@ -95,71 +102,90 @@ def _einsum(equation, *operands):
         A, B = inner_list
         return A, B
     return operands
-  assert isinstance(equation, str), 'Only accept str equation'
+  assert isinstance(equation, str), "Only accept str equation"
   filtered_operands = get_params(*operands)
   return jnp.einsum(equation, *filtered_operands)
-def _sdpa_reference(query, key, value, attn_mask=None, dropout_p=0.0,
-        is_causal=False, scale=None, enable_gqa=False) -> torch.Tensor:
-    L, S = query.size(-2), key.size(-2)
-    scale_factor = 1 / math.sqrt(query.size(-1)) if scale is None else scale
-    attn_bias = torch.zeros(L, S, dtype=query.dtype, device=query.device)
-    if is_causal:
-        assert attn_mask is None
-        temp_mask = torch.ones(L, S, dtype=torch.bool, device=query.device).tril(diagonal=0)
-        attn_bias.masked_fill_(temp_mask.logical_not(), float("-inf"))
-        attn_bias.to(query.dtype)
-    if attn_mask is not None:
-        if attn_mask.dtype == torch.bool:
-            attn_bias.masked_fill_(attn_mask.logical_not(), float("-inf"))
-        else:
-            attn_bias += attn_mask
-    if enable_gqa:
-        key = key.repeat_interleave(query.size(-3)//key.size(-3), -3)
-        value = value.repeat_interleave(query.size(-3)//value.size(-3), -3)
-    attn_weight = query @ key.transpose(-2, -1) * scale_factor
-    attn_weight += attn_bias
-    attn_weight = torch.softmax(attn_weight, dim=-1)
-    if dropout_p > 0:
-      attn_weight = torch.dropout(attn_weight, dropout_p, train=True)
-    return attn_weight @ value
+def _sdpa_reference(
+    query,
+    key,
+    value,
+    attn_mask=None,
+    dropout_p=0.0,
+    is_causal=False,
+    scale=None,
+    enable_gqa=False,
+) -> torch.Tensor:
+  L, S = query.size(-2), key.size(-2)
+  scale_factor = 1 / math.sqrt(query.size(-1)) if scale is None else scale
+  attn_bias = torch.zeros(L, S, dtype=query.dtype, device=query.device)
+  if is_causal:
+    assert attn_mask is None
+    temp_mask = torch.ones(
+        L, S, dtype=torch.bool, device=query.device).tril(diagonal=0)
+    attn_bias.masked_fill_(temp_mask.logical_not(), float("-inf"))
+    attn_bias.to(query.dtype)
+  if attn_mask is not None:
+    if attn_mask.dtype == torch.bool:
+      attn_bias.masked_fill_(attn_mask.logical_not(), float("-inf"))
+    else:
+      attn_bias += attn_mask
+  if enable_gqa:
+    key = key.repeat_interleave(query.size(-3) // key.size(-3), -3)
+    value = value.repeat_interleave(query.size(-3) // value.size(-3), -3)
+  attn_weight = query @ key.transpose(-2, -1) * scale_factor
+  attn_weight += attn_bias
+  attn_weight = torch.softmax(attn_weight, dim=-1)
+  if dropout_p > 0:
+    attn_weight = torch.dropout(attn_weight, dropout_p, train=True)
+  return attn_weight @ value
 from jax.sharding import PartitionSpec
 def _tpu_flash_attention(query, key, value, env):
-  fsdp_partition = PartitionSpec('fsdp')
+  fsdp_partition = PartitionSpec("fsdp")
   def wrap_flash_attention(query, key, value):
     block_sizes = flash_attention.BlockSizes(
-      block_b=min(2, query.shape[0]),
-      block_q=min(512, query.shape[2]),
-      block_k_major=min(512, key.shape[2]),
-      block_k=min(512, key.shape[2]),
-      block_q_major_dkv=min(512, query.shape[2]),
-      block_k_major_dkv=min(512, key.shape[2]),
-      block_k_dkv=min(512, key.shape[2]),
-      block_q_dkv=min(512, query.shape[2]),
-      block_k_major_dq=min(512, key.shape[2]),
-      block_k_dq=min(256, key.shape[2]),
-      block_q_dq=min(1024, query.shape[2]),
+        block_b=min(2, query.shape[0]),
+        block_q=min(512, query.shape[2]),
+        block_k_major=min(512, key.shape[2]),
+        block_k=min(512, key.shape[2]),
+        block_q_major_dkv=min(512, query.shape[2]),
+        block_k_major_dkv=min(512, key.shape[2]),
+        block_k_dkv=min(512, key.shape[2]),
+        block_q_dkv=min(512, query.shape[2]),
+        block_k_major_dq=min(512, key.shape[2]),
+        block_k_dq=min(256, key.shape[2]),
+        block_q_dq=min(1024, query.shape[2]),
     )
     return flash_attention.flash_attention(
         query, key, value, causal=True, block_sizes=block_sizes)
   if env.config.shmap_flash_attention:
     wrap_flash_attention = shard_map(
-      wrap_flash_attention,
-      mesh=env._mesh,
-      in_specs=(fsdp_partition, fsdp_partition, fsdp_partition),
-      out_specs=fsdp_partition ,
-      check_rep=False,
+        wrap_flash_attention,
+        mesh=env._mesh,
+        in_specs=(fsdp_partition, fsdp_partition, fsdp_partition),
+        out_specs=fsdp_partition,
+        check_rep=False,
     )
-  #return flash_attn_mapped(query, key, value)
+  # return flash_attn_mapped(query, key, value)
   return wrap_flash_attention(query, key, value)
+@register_function(torch.nn.functional.one_hot)
+def one_hot(tensor, num_classes=-1):
+  if num_classes == -1:
+    num_classes = jnp.max(tensor) + 1
+  return jax.nn.one_hot(tensor, num_classes).astype(jnp.int64)
 @register_function(torch.nn.functional.pad)
 def pad(tensor, pad, mode="constant", value=None):
   # For padding modes that have different names between Torch and NumPy, this
@@ -210,27 +236,60 @@ def pad(tensor, pad, mode="constant", value=None):
     return jnp.pad(tensor[nd_slice], numpy_pad_width, mode=numpy_mode, **kwargs)
-@register_function(torch.nn.functional.scaled_dot_product_attention, is_jax_function=False, needs_env=True)
-@register_function(torch.ops.aten.scaled_dot_product_attention, is_jax_function=False, needs_env=True)
+@register_function(
+    torch.nn.functional.scaled_dot_product_attention,
+    is_jax_function=False,
+    needs_env=True,
+)
+@register_function(
+    torch.ops.aten.scaled_dot_product_attention,
+    is_jax_function=False,
+    needs_env=True)
 def scaled_dot_product_attention(
-   query, key, value, attn_mask=None,
-   dropout_p=0.0, is_causal=False, scale=None, enable_gqa=False, env=None) -> torch.Tensor:
-   if env.config.use_tpu_flash_attention:
+    query,
+    key,
+    value,
+    attn_mask=None,
+    dropout_p=0.0,
+    is_causal=False,
+    scale=None,
+    enable_gqa=False,
+    env=None,
+) -> torch.Tensor:
+  if env.config.use_tpu_flash_attention:
     jquery, jkey, jvalue = env.t2j_iso((query, key, value))
     res = _tpu_flash_attention(jquery, jkey, jvalue, env)
     return env.j2t_iso(res)
-   return _sdpa_reference(query, key, value, attn_mask, dropout_p, is_causal, scale, enable_gqa)
+  return _sdpa_reference(query, key, value, attn_mask, dropout_p, is_causal,
+                         scale, enable_gqa)
-@register_function(torch.Tensor.__getitem__)
+@register_function(
+    torch.Tensor.__getitem__, is_jax_function=False, is_view_op=True)
 def getitem(self, indexes):
   if isinstance(indexes, list) and isinstance(indexes[0], int):
     # list of int, i.e. x[[1, 2]] NOT x[1, 2] (the second would be tuple of int)
-    indexes = (indexes, )
+    indexes = (indexes,)
   elif isinstance(indexes, list):
     indexes = tuple(indexes)
-  return self[indexes]
+  def is_narrow_slicing():
+    tensor_free = not pytree.tree_any(
+        lambda x: isinstance(x, torch.Tensor) or isinstance(x, jax.Array),
+        indexes)
+    list_free = not isinstance(indexes, tuple) or all(
+        [False if isinstance(x, list) else True for x in indexes])
+    return tensor_free and list_free
+  if is_narrow_slicing():
+    return View(self, view_info=NarrowInfo(indexes), env=self._env)
+  indexes = self._env.t2j_iso(indexes)
+  return torchax.tensor.Tensor(self._elem[indexes], self._env)
 @register_function(torch.corrcoef)
 def _corrcoef(x):
@@ -238,15 +297,22 @@ def _corrcoef(x):
     return jnp.corrcoef(x).astype(jnp.float32)
   return jnp.corrcoef(x)
 @register_function(torch.sparse.mm, is_jax_function=False)
-def _sparse_mm(mat1, mat2, reduce='sum'):
+def _sparse_mm(mat1, mat2, reduce="sum"):
   return torch.mm(mat1, mat2)
 @register_function(torch.isclose)
 def _aten_isclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False):
   return jnp.isclose(input, other, rtol, atol, equal_nan)
+@register_function(torch.linalg.det)
+def linalg_det(input):
+  return jnp.linalg.det(input)
 @register_function(torch.ones)
 def _ones(*size: int, dtype=None, **kwargs):
   if len(size) == 1 and isinstance(size[0], collections.abc.Iterable):
@@ -281,23 +347,39 @@ def empty(*size: Sequence[int], dtype=None, **kwargs):
     size = size[0]
   return jnp.empty(size, dtype=dtype)
-@register_function(torch.arange, is_jax_function=False)
+@register_function(torch.arange, is_jax_function=True)
 def arange(
-  start, end=None, step=None,
-  out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False,
-  pin_memory=None,
+    start,
+    end=None,
+    step=None,
+    out=None,
+    dtype=None,
+    layout=torch.strided,
+    device=None,
+    requires_grad=False,
+    pin_memory=None,
 ):
   if end is None:
     end = start
     start = 0
   if step is None:
     step = 1
-  return torch.ops.aten.arange(start, end, step, dtype=dtype)
+  return jaten._aten_arange(start, end, step, dtype=dtype)
-@register_function(torch.empty_strided, is_jax_function=False)
+@register_function(torch.empty_strided, is_jax_function=True)
 def empty_strided(
-  size, stride, *, dtype=None, layout=None, device=None, requires_grad=False, pin_memory=False):
-  return empty(size, dtype=dtype)
+    size,
+    stride,
+    *,
+    dtype=None,
+    layout=None,
+    device=None,
+    requires_grad=False,
+    pin_memory=False,
+):
+  return empty(size, dtype=dtype, requires_grad=requires_grad)
 @register_function(torch.unravel_index)
@@ -305,27 +387,33 @@ def unravel_index(indices, shape):
   return jnp.unravel_index(indices, shape)
-@register_function(torch.rand, is_jax_function=False)
-def rand(
-  *size, **kwargs
-):
+@register_function(torch.rand, is_jax_function=True, needs_env=True)
+def rand(*size, **kwargs):
   if len(size) == 1 and isinstance(size[0], collections.abc.Iterable):
     size = size[0]
-  return torch.ops.aten.rand(size, **kwargs)
+  return jaten._rand(size, **kwargs)
-@register_function(torch.randn, is_jax_function=False)
+@register_function(torch.randn, is_jax_function=True, needs_env=True)
 def randn(
-  *size, generator=None, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False, pin_memory=False
+    *size,
+    generator=None,
+    out=None,
+    dtype=None,
+    layout=torch.strided,
+    device=None,
+    requires_grad=False,
+    pin_memory=False,
+    env=None,
 ):
   if len(size) == 1 and isinstance(size[0], collections.abc.Iterable):
     size = size[0]
-  return torch.ops.aten.randn(size, generator=generator, dtype=dtype)
+  return jaten._aten_randn(size, generator=generator, dtype=dtype, env=env)
-@register_function(torch.randint, is_jax_function=False)
-def randint(
-  *args, **kwargs
-):
-  return torch.ops.aten.randint(*args, **kwargs)
+@register_function(torch.randint, is_jax_function=False, needs_env=True)
+def randint(*args, **kwargs):
+  return jaten._aten_randint(*args, **kwargs)
 @register_function(torch.logdet)
@@ -356,14 +444,17 @@ def linalg_solve_ex(a, b):
   res, info = jaten._aten__linalg_solve_ex(a, b)
   return res, info
 @register_function(torch.linalg.svd)
 def linalg_svd(a, full_matrices=True):
   return jaten._aten__linalg_svd(a, full_matrices=full_matrices)
 @register_function(torch.linalg.matrix_power)
 def matrix_power(A, n, *, out=None):
   return jnp.linalg.matrix_power(A, n)
 @register_function(torch.svd)
 def svd(a, some=True, compute_uv=True):
   if not compute_uv:
@@ -374,21 +465,24 @@ def svd(a, some=True, compute_uv=True):
   U, S, V = jaten._aten__linalg_svd(a, full_matrices=not some)
   return U, S, jnp.matrix_transpose(V)
 @register_function(torch.cdist)
-def _cdist(x1, x2, p=2.0, compute_mode='use_mm_for_euclid_dist_if_necessary'):
-    return jaten._aten_cdist(x1, x2, p, compute_mode)
+def _cdist(x1, x2, p=2.0, compute_mode="use_mm_for_euclid_dist_if_necessary"):
+  return jaten._aten_cdist(x1, x2, p, compute_mode)
 @register_function(torch.lu)
 def lu(A, **kwargs):
-  lu,pivots,_ = jax.lax.linalg.lu(A)
+  lu, pivots, _ = jax.lax.linalg.lu(A)
   # JAX pivots are offset by 1 compared to torch
   _pivots = pivots + 1
   info_shape = pivots.shape[:-1]
   info = jnp.zeros(info_shape, dtype=mappings.t2j_dtype(torch.int32))
-  if kwargs['get_infos'] == True:
+  if kwargs["get_infos"] == True:
     return lu, _pivots, info
   return lu, _pivots
 @register_function(torch.lu_solve)
 def lu_solve(b, LU_data, LU_pivots, **kwargs):
   # JAX pivots are offset by 1 compared to torch
@@ -396,6 +490,7 @@ def lu_solve(b, LU_data, LU_pivots, **kwargs):
   x = jax.scipy.linalg.lu_solve((LU_data, _pivots), b)
   return x
 @register_function(torch.linalg.tensorsolve)
 def linalg_tensorsolve(A, b, dims=None):
   # examples:
@@ -425,3 +520,57 @@ def functional_linear(self, weights, bias=None):
   if bias is not None:
     res += bias
   return res
+@register_function(torch.nn.functional.interpolate)
+def functional_interpolate(
+    input,
+    size: Tuple[int, int],
+    scale_factor: Optional[float],
+    mode: str,
+    align_corners: bool,
+    recompute_scale_factor: bool,
+    antialias: bool,
+):
+  supported_methods = (
+      "nearest",
+      "linear",
+      "bilinear",
+      "trilinear",
+      "cubic",
+      "bicubic",
+      "tricubic",
+      "lanczos3",
+      "lanczos5",
+  )
+  is_jax_supported = mode in supported_methods
+  if not is_jax_supported:
+    raise torchax.tensor.OperatorNotFound(
+        f"JAX does not support interpolation mode: {mode}. Supported modes are: {supported_methods}"
+    )
+  # None check
+  antialias = antialias or False
+  align_corners = align_corners or False
+  if mode in ('cubic', 'bicubic',
+              'tricubic') and not antialias and size is not None:
+    return jimage.interpolate_bicubic_no_aa(
+        input,
+        size[0],
+        size[1],
+        align_corners,
+    )
+  else:
+    # fallback
+    raise torchax.tensor.OperatorNotFound(
+        f"JAX does not support interpolation mode: {mode}. Supported modes are: {supported_methods}"
+    )
+@register_function(torch.Tensor.repeat_interleave)
+def torch_Tensor_repeat_interleave(self,
+                                   repeats,
+                                   dim=None,
+                                   *,
+                                   output_size=None):
+  return jnp.repeat(self, repeats, axis=dim, total_repeat_length=output_size)

torchax/ops/jtorchvision_nms.py CHANGED Viewed

@@ -53,8 +53,8 @@ def _self_suppression(in_args):
   can_suppress_others = jnp.reshape(
       jnp.max(iou, 1) <= 0.5, [batch_size, -1, 1]).astype(iou.dtype)
   iou_suppressed = jnp.reshape(
-      (jnp.max(can_suppress_others * iou, 1) <= 0.5).astype(iou.dtype),
-      [batch_size, -1, 1]) * iou
+      (jnp.max(can_suppress_others * iou, 1) <= 0.5).astype(
+          iou.dtype), [batch_size, -1, 1]) * iou
   iou_sum_new = jnp.sum(iou_suppressed, [1, 2])
   return iou_suppressed, jnp.any(iou_sum - iou_sum_new > 0.5), iou_sum_new
@@ -65,9 +65,8 @@ def _cross_suppression(in_args):
   new_slice = lax.dynamic_slice(boxes, [0, inner_idx * _NMS_TILE_SIZE, 0],
                                 [batch_size, _NMS_TILE_SIZE, 4])
   iou = _bbox_overlap(new_slice, box_slice)
-  ret_slice = jnp.expand_dims(
-      (jnp.all(iou < iou_threshold, [1])).astype(box_slice.dtype),
-      2) * box_slice
+  ret_slice = jnp.expand_dims((jnp.all(iou < iou_threshold, [1])).astype(
+      box_slice.dtype), 2) * box_slice
   return boxes, ret_slice, iou_threshold, inner_idx + 1
@@ -90,45 +89,40 @@ def _suppression_loop_body(in_args):
   # Iterates over tiles that can possibly suppress the current tile.
   box_slice = lax.dynamic_slice(boxes, [0, idx * _NMS_TILE_SIZE, 0],
                                 [batch_size, _NMS_TILE_SIZE, 4])
   def _loop_cond(in_args):
     _, _, _, inner_idx = in_args
     return inner_idx < idx
-  _, box_slice, _, _ = lax.while_loop(
-      _loop_cond,
-      _cross_suppression, (boxes, box_slice, iou_threshold,
-                           0))
+  _, box_slice, _, _ = lax.while_loop(_loop_cond, _cross_suppression,
+                                      (boxes, box_slice, iou_threshold, 0))
   # Iterates over the current tile to compute self-suppression.
   iou = _bbox_overlap(box_slice, box_slice)
   mask = jnp.expand_dims(
-      jnp.reshape(jnp.arange(_NMS_TILE_SIZE), [1, -1]) > jnp.reshape(
-          jnp.arange(_NMS_TILE_SIZE), [-1, 1]), 0)
+      jnp.reshape(jnp.arange(_NMS_TILE_SIZE), [1, -1])
+      > jnp.reshape(jnp.arange(_NMS_TILE_SIZE), [-1, 1]), 0)
   iou *= (jnp.logical_and(mask, iou >= iou_threshold)).astype(iou.dtype)
   def _loop_cond2(in_args):
     _, loop_condition, _ = in_args
     return loop_condition
-  suppressed_iou, _, _ = lax.while_loop(
-      _loop_cond2, _self_suppression,
-      (iou, True,
-       jnp.sum(iou, [1, 2])))
+  suppressed_iou, _, _ = lax.while_loop(_loop_cond2, _self_suppression,
+                                        (iou, True, jnp.sum(iou, [1, 2])))
   suppressed_box = jnp.sum(suppressed_iou, 1) > 0
   box_slice *= jnp.expand_dims(1.0 - suppressed_box.astype(box_slice.dtype), 2)
   # Uses box_slice to update the input boxes.
-  mask = jnp.reshape(
-      (jnp.equal(jnp.arange(num_tiles), idx)).astype(boxes.dtype),
-      [1, -1, 1, 1])
+  mask = jnp.reshape((jnp.equal(jnp.arange(num_tiles),
+                                idx)).astype(boxes.dtype), [1, -1, 1, 1])
   boxes = jnp.tile(jnp.expand_dims(
       box_slice, 1), [1, num_tiles, 1, 1]) * mask + jnp.reshape(
           boxes, [batch_size, num_tiles, _NMS_TILE_SIZE, 4]) * (1 - mask)
   boxes = jnp.reshape(boxes, [batch_size, -1, 4])
   # Updates output_size.
-  output_size += jnp.sum(
-      jnp.any(box_slice > 0, [2]).astype(jnp.int32), [1])
+  output_size += jnp.sum(jnp.any(box_slice > 0, [2]).astype(jnp.int32), [1])
   return boxes, iou_threshold, output_size, idx + 1
@@ -185,8 +179,8 @@ def non_max_suppression_padded(scores, boxes, max_output_size, iou_threshold):
   """
   batch_size = boxes.shape[0]
   num_boxes = boxes.shape[1]
-  pad = int(jnp.ceil(float(num_boxes) / _NMS_TILE_SIZE)
-           ) * _NMS_TILE_SIZE - num_boxes
+  pad = int(jnp.ceil(
+      float(num_boxes) / _NMS_TILE_SIZE)) * _NMS_TILE_SIZE - num_boxes
   boxes = jnp.pad(boxes.astype(jnp.float32), [[0, 0], [0, pad], [0, 0]])
   scores = jnp.pad(scores.astype(jnp.float32), [[0, 0], [0, pad]])
   num_boxes += pad
@@ -194,15 +188,12 @@ def non_max_suppression_padded(scores, boxes, max_output_size, iou_threshold):
   def _loop_cond(in_args):
     unused_boxes, unused_threshold, output_size, idx = in_args
     return jnp.logical_and(
-        jnp.min(output_size) < max_output_size,
-        idx < num_boxes // _NMS_TILE_SIZE)
+        jnp.min(output_size) < max_output_size, idx
+        < num_boxes // _NMS_TILE_SIZE)
   selected_boxes, _, output_size, _ = lax.while_loop(
-      _loop_cond, _suppression_loop_body, (
-          boxes, iou_threshold,
-          jnp.zeros([batch_size], jnp.int32),
-          0
-      ))
+      _loop_cond, _suppression_loop_body,
+      (boxes, iou_threshold, jnp.zeros([batch_size], jnp.int32), 0))
   idx = num_boxes - lax.top_k(
       jnp.any(selected_boxes > 0, [2]).astype(jnp.int32) *
       jnp.expand_dims(jnp.arange(num_boxes, 0, -1), 0),
@@ -210,30 +201,28 @@ def non_max_suppression_padded(scores, boxes, max_output_size, iou_threshold):
   idx = jnp.minimum(idx, num_boxes - 1)
   idx = jnp.reshape(
       idx + jnp.reshape(jnp.arange(batch_size) * num_boxes, [-1, 1]), [-1])
   return idx
-  boxes = jnp.reshape(
-      (jnp.reshape(boxes, [-1, 4]))[idx],
-      [batch_size, max_output_size, 4])
-  boxes = boxes * (
-      jnp.reshape(jnp.arange(max_output_size), [1, -1, 1]) < jnp.reshape(
-          output_size, [-1, 1, 1])).astype(boxes.dtype)
+  boxes = jnp.reshape((jnp.reshape(boxes, [-1, 4]))[idx],
+                      [batch_size, max_output_size, 4])
+  boxes = boxes * (jnp.reshape(jnp.arange(max_output_size), [1, -1, 1])
+                   < jnp.reshape(output_size, [-1, 1, 1])).astype(boxes.dtype)
   scores = jnp.reshape(
-      jnp.reshape(scores, [-1, 1])[idx],
-      [batch_size, max_output_size])
-  scores = scores * (
-      jnp.reshape(jnp.arange(max_output_size), [1, -1]) < jnp.reshape(
-          output_size, [-1, 1])).astype(scores.dtype)
+      jnp.reshape(scores, [-1, 1])[idx], [batch_size, max_output_size])
+  scores = scores * (jnp.reshape(jnp.arange(max_output_size), [1, -1])
+                     < jnp.reshape(output_size, [-1, 1])).astype(scores.dtype)
   return scores, boxes
 # registry:
 def nms(boxes, scores, iou_threshold):
   max_output_size = boxes.shape[0]
   boxes = boxes.reshape((1, *boxes.shape))
   scores = scores.reshape((1, *scores.shape))
-  res = non_max_suppression_padded(scores, boxes, max_output_size, iou_threshold)
+  res = non_max_suppression_padded(scores, boxes, max_output_size,
+                                   iou_threshold)
   return res
@@ -242,4 +231,4 @@ try:
   import torchvision
   ops_registry.register_torch_dispatch_op(torch.ops.torchvision.nms, nms)
 except Exception:
-  pass
+  pass

torchax 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

Potentially problematic release.

torchax 0.0.4py3-none-any.whl → 0.0.6py3-none-any.whl