PyPI - warp-lang - Versions diffs - 1.0.0b5__py3-none-manylinux2014_x86_64.whl → 1.0.0b6__py3-none-manylinux2014_x86_64.whl - Mend

warp-lang 1.0.0b5__py3-none-manylinux2014_x86_64.whl → 1.0.0b6__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

docs/conf.py +3 -4
examples/env/env_ant.py +1 -1
examples/env/env_cartpole.py +1 -1
examples/env/env_humanoid.py +1 -1
examples/example_dem.py +28 -26
examples/example_diffray.py +37 -30
examples/example_fluid.py +7 -3
examples/example_jacobian_ik.py +1 -1
examples/example_mesh_intersect.py +10 -7
examples/example_nvdb.py +3 -3
examples/example_render_opengl.py +19 -10
examples/example_sim_cartpole.py +9 -5
examples/example_sim_cloth.py +29 -25
examples/example_sim_fk_grad.py +2 -2
examples/example_sim_fk_grad_torch.py +3 -3
examples/example_sim_grad_bounce.py +11 -8
examples/example_sim_grad_cloth.py +12 -9
examples/example_sim_granular.py +2 -2
examples/example_sim_granular_collision_sdf.py +13 -13
examples/example_sim_neo_hookean.py +3 -3
examples/example_sim_particle_chain.py +2 -2
examples/example_sim_quadruped.py +8 -5
examples/example_sim_rigid_chain.py +8 -5
examples/example_sim_rigid_contact.py +13 -10
examples/example_sim_rigid_fem.py +2 -2
examples/example_sim_rigid_gyroscopic.py +2 -2
examples/example_sim_rigid_kinematics.py +1 -1
examples/example_sim_trajopt.py +3 -2
examples/fem/example_apic_fluid.py +5 -7
examples/fem/example_diffusion_mgpu.py +18 -16
warp/__init__.py +3 -2
warp/bin/warp.so +0 -0
warp/build_dll.py +29 -9
warp/builtins.py +206 -7
warp/codegen.py +58 -38
warp/config.py +3 -1
warp/context.py +234 -128
warp/fem/__init__.py +2 -2
warp/fem/cache.py +2 -1
warp/fem/field/nodal_field.py +18 -17
warp/fem/geometry/hexmesh.py +11 -6
warp/fem/geometry/quadmesh_2d.py +16 -12
warp/fem/geometry/tetmesh.py +19 -8
warp/fem/geometry/trimesh_2d.py +18 -7
warp/fem/integrate.py +341 -196
warp/fem/quadrature/__init__.py +1 -1
warp/fem/quadrature/pic_quadrature.py +138 -53
warp/fem/quadrature/quadrature.py +81 -9
warp/fem/space/__init__.py +1 -1
warp/fem/space/basis_space.py +169 -51
warp/fem/space/grid_2d_function_space.py +2 -2
warp/fem/space/grid_3d_function_space.py +2 -2
warp/fem/space/hexmesh_function_space.py +2 -2
warp/fem/space/partition.py +9 -6
warp/fem/space/quadmesh_2d_function_space.py +2 -2
warp/fem/space/shape/cube_shape_function.py +27 -15
warp/fem/space/shape/square_shape_function.py +29 -18
warp/fem/space/tetmesh_function_space.py +2 -2
warp/fem/space/topology.py +10 -0
warp/fem/space/trimesh_2d_function_space.py +2 -2
warp/fem/utils.py +10 -5
warp/native/array.h +49 -8
warp/native/builtin.h +31 -14
warp/native/cuda_util.cpp +8 -3
warp/native/cuda_util.h +1 -0
warp/native/exports.h +1177 -1108
warp/native/intersect.h +4 -4
warp/native/intersect_adj.h +8 -8
warp/native/mat.h +65 -6
warp/native/mesh.h +126 -5
warp/native/quat.h +28 -4
warp/native/vec.h +76 -14
warp/native/warp.cu +1 -6
warp/render/render_opengl.py +261 -109
warp/sim/import_mjcf.py +13 -7
warp/sim/import_urdf.py +14 -14
warp/sim/inertia.py +17 -18
warp/sim/model.py +67 -67
warp/sim/render.py +1 -1
warp/sparse.py +6 -6
warp/stubs.py +19 -81
warp/tape.py +1 -1
warp/tests/__main__.py +3 -6
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/{test_kinematics.py → disabled_kinematics.py} +10 -12
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +102 -106
warp/tests/test_arithmetic.py +39 -40
warp/tests/test_array.py +46 -48
warp/tests/test_array_reduce.py +25 -19
warp/tests/test_atomic.py +62 -26
warp/tests/test_bool.py +16 -11
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +9 -12
warp/tests/test_closest_point_edge_edge.py +53 -57
warp/tests/test_codegen.py +164 -134
warp/tests/test_compile_consts.py +13 -19
warp/tests/test_conditional.py +30 -32
warp/tests/test_copy.py +9 -12
warp/tests/test_ctypes.py +90 -98
warp/tests/test_dense.py +20 -14
warp/tests/test_devices.py +34 -35
warp/tests/test_dlpack.py +74 -75
warp/tests/test_examples.py +215 -97
warp/tests/test_fabricarray.py +15 -21
warp/tests/test_fast_math.py +14 -11
warp/tests/test_fem.py +280 -97
warp/tests/test_fp16.py +19 -15
warp/tests/test_func.py +177 -194
warp/tests/test_generics.py +71 -77
warp/tests/test_grad.py +83 -32
warp/tests/test_grad_customs.py +7 -9
warp/tests/test_hash_grid.py +6 -10
warp/tests/test_import.py +9 -23
warp/tests/test_indexedarray.py +19 -21
warp/tests/test_intersect.py +15 -9
warp/tests/test_large.py +17 -19
warp/tests/test_launch.py +14 -17
warp/tests/test_lerp.py +63 -63
warp/tests/test_lvalue.py +84 -35
warp/tests/test_marching_cubes.py +9 -13
warp/tests/test_mat.py +388 -3004
warp/tests/test_mat_lite.py +9 -12
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +10 -11
warp/tests/test_matmul.py +104 -100
warp/tests/test_matmul_lite.py +72 -98
warp/tests/test_mesh.py +35 -32
warp/tests/test_mesh_query_aabb.py +18 -25
warp/tests/test_mesh_query_point.py +39 -23
warp/tests/test_mesh_query_ray.py +9 -21
warp/tests/test_mlp.py +8 -9
warp/tests/test_model.py +89 -93
warp/tests/test_modules_lite.py +15 -25
warp/tests/test_multigpu.py +87 -114
warp/tests/test_noise.py +10 -12
warp/tests/test_operators.py +14 -21
warp/tests/test_options.py +10 -11
warp/tests/test_pinned.py +16 -18
warp/tests/test_print.py +16 -20
warp/tests/test_quat.py +121 -88
warp/tests/test_rand.py +12 -13
warp/tests/test_reload.py +27 -32
warp/tests/test_rounding.py +7 -10
warp/tests/test_runlength_encode.py +105 -106
warp/tests/test_smoothstep.py +8 -9
warp/tests/test_snippet.py +13 -22
warp/tests/test_sparse.py +30 -29
warp/tests/test_spatial.py +179 -174
warp/tests/test_streams.py +100 -107
warp/tests/test_struct.py +98 -67
warp/tests/test_tape.py +11 -17
warp/tests/test_torch.py +89 -86
warp/tests/test_transient_module.py +9 -12
warp/tests/test_types.py +328 -50
warp/tests/test_utils.py +217 -218
warp/tests/test_vec.py +133 -2133
warp/tests/test_vec_lite.py +8 -11
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +391 -382
warp/tests/test_volume_write.py +122 -135
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +291 -0
warp/tests/{test_base.py → unittest_utils.py} +138 -25
warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
warp/tests/{test_debug.py → walkthough_debug.py} +2 -15
warp/thirdparty/unittest_parallel.py +257 -54
warp/types.py +119 -98
warp/utils.py +14 -0
{warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/METADATA +2 -1
{warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/RECORD +182 -178
{warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
warp/tests/test_all.py +0 -239
warp/tests/test_conditional_unequal_types_kernels.py +0 -14
warp/tests/test_coverage.py +0 -38
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
{warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0

warp/fem/space/shape/square_shape_function.py CHANGED Viewed

@@ -25,6 +25,7 @@ class SquareBipolynomialShapeFunctions:
         self.LOBATTO_COORDS = wp.constant(NodeVec(lobatto_coords))
         self.LOBATTO_WEIGHT = wp.constant(NodeVec(lobatto_weight))
         self.LAGRANGE_SCALE = wp.constant(NodeVec(lagrange_scale))
+        self.ORDER_PLUS_ONE = wp.constant(self.ORDER + 1)
     @property
     def name(self) -> str:
@@ -93,13 +94,21 @@ class SquareBipolynomialShapeFunctions:
         ):
             return 0.5
+        def trace_node_quadrature_weight_open(
+            node_index_in_elt: int,
+        ):
+            return 0.0
+        if not is_closed(self.family):
+            return cache.get_func(trace_node_quadrature_weight_open, self.name)
         if ORDER == 1:
             return cache.get_func(trace_node_quadrature_weight_linear, self.name)
         return cache.get_func(trace_node_quadrature_weight, self.name)
     def make_element_inner_weight(self):
-        ORDER = self.ORDER
+        ORDER_PLUS_ONE = self.ORDER_PLUS_ONE
         LOBATTO_COORDS = self.LOBATTO_COORDS
         LAGRANGE_SCALE = self.LAGRANGE_SCALE
@@ -107,11 +116,11 @@ class SquareBipolynomialShapeFunctions:
             coords: Coords,
             node_index_in_elt: int,
         ):
-            node_i = node_index_in_elt // (ORDER + 1)
-            node_j = node_index_in_elt - (ORDER + 1) * node_i
+            node_i = node_index_in_elt // ORDER_PLUS_ONE
+            node_j = node_index_in_elt - ORDER_PLUS_ONE * node_i
             w = float(1.0)
-            for k in range(ORDER + 1):
+            for k in range(ORDER_PLUS_ONE):
                 if k != node_i:
                     w *= coords[0] - LOBATTO_COORDS[k]
                 if k != node_j:
@@ -131,13 +140,13 @@ class SquareBipolynomialShapeFunctions:
             wy = (1.0 - coords[1]) * (1.0 - v[1]) + v[1] * coords[1]
             return wx * wy
-        if ORDER == 1:
+        if self.ORDER == 1 and is_closed(self.family):
             return cache.get_func(element_inner_weight_linear, self.name)
         return cache.get_func(element_inner_weight, self.name)
     def make_element_inner_weight_gradient(self):
-        ORDER = self.ORDER
+        ORDER_PLUS_ONE = self.ORDER_PLUS_ONE
         LOBATTO_COORDS = self.LOBATTO_COORDS
         LAGRANGE_SCALE = self.LAGRANGE_SCALE
@@ -145,12 +154,12 @@ class SquareBipolynomialShapeFunctions:
             coords: Coords,
             node_index_in_elt: int,
         ):
-            node_i = node_index_in_elt // (ORDER + 1)
-            node_j = node_index_in_elt - (ORDER + 1) * node_i
+            node_i = node_index_in_elt // ORDER_PLUS_ONE
+            node_j = node_index_in_elt - ORDER_PLUS_ONE * node_i
             prefix_x = float(1.0)
             prefix_y = float(1.0)
-            for k in range(ORDER + 1):
+            for k in range(ORDER_PLUS_ONE):
                 if k != node_i:
                     prefix_y *= coords[0] - LOBATTO_COORDS[k]
                 if k != node_j:
@@ -159,7 +168,7 @@ class SquareBipolynomialShapeFunctions:
             grad_x = float(0.0)
             grad_y = float(0.0)
-            for k in range(ORDER + 1):
+            for k in range(ORDER_PLUS_ONE):
                 if k != node_i:
                     delta_x = coords[0] - LOBATTO_COORDS[k]
                     grad_x = grad_x * delta_x + prefix_x
@@ -187,7 +196,7 @@ class SquareBipolynomialShapeFunctions:
             return wp.vec2(dx * wy, dy * wx)
-        if ORDER == 1:
+        if self.ORDER == 1 and is_closed(self.family):
             return cache.get_func(element_inner_weight_gradient_linear, self.name)
         return cache.get_func(element_inner_weight_gradient, self.name)
@@ -230,6 +239,7 @@ class SquareSerendipityShapeFunctions:
         self.LOBATTO_COORDS = wp.constant(NodeVec(lobatto_coords))
         self.LOBATTO_WEIGHT = wp.constant(NodeVec(lobatto_weight))
         self.LAGRANGE_SCALE = wp.constant(NodeVec(lagrange_scale))
+        self.ORDER_PLUS_ONE = wp.constant(self.ORDER + 1)
         self.node_type_and_type_index = self._get_node_type_and_type_index()
         self._node_lobatto_indices = self._get_node_lobatto_indices()
@@ -328,6 +338,7 @@ class SquareSerendipityShapeFunctions:
     def make_element_inner_weight(self):
         ORDER = self.ORDER
+        ORDER_PLUS_ONE = self.ORDER_PLUS_ONE
         LOBATTO_COORDS = self.LOBATTO_COORDS
         LAGRANGE_SCALE = self.LAGRANGE_SCALE
@@ -361,7 +372,7 @@ class SquareSerendipityShapeFunctions:
             if node_type == SquareSerendipityShapeFunctions.EDGE_Y:
                 w *= wp.select(node_i == 0, coords[0], 1.0 - coords[0])
             else:
-                for k in range(ORDER + 1):
+                for k in range(ORDER_PLUS_ONE):
                     if k != node_i:
                         w *= coords[0] - LOBATTO_COORDS[k]
@@ -370,7 +381,7 @@ class SquareSerendipityShapeFunctions:
             if node_type == SquareSerendipityShapeFunctions.EDGE_X:
                 w *= wp.select(node_j == 0, coords[1], 1.0 - coords[1])
             else:
-                for k in range(ORDER + 1):
+                for k in range(ORDER_PLUS_ONE):
                     if k != node_j:
                         w *= coords[1] - LOBATTO_COORDS[k]
                 w *= LAGRANGE_SCALE[node_j]
@@ -381,6 +392,7 @@ class SquareSerendipityShapeFunctions:
     def make_element_inner_weight_gradient(self):
         ORDER = self.ORDER
+        ORDER_PLUS_ONE = self.ORDER_PLUS_ONE
         LOBATTO_COORDS = self.LOBATTO_COORDS
         LAGRANGE_SCALE = self.LAGRANGE_SCALE
@@ -424,7 +436,7 @@ class SquareSerendipityShapeFunctions:
                 prefix_x = wp.select(node_j == 0, coords[1], 1.0 - coords[1])
             else:
                 prefix_x = LAGRANGE_SCALE[node_j]
-                for k in range(ORDER + 1):
+                for k in range(ORDER_PLUS_ONE):
                     if k != node_j:
                         prefix_x *= coords[1] - LOBATTO_COORDS[k]
@@ -432,7 +444,7 @@ class SquareSerendipityShapeFunctions:
                 prefix_y = wp.select(node_i == 0, coords[0], 1.0 - coords[0])
             else:
                 prefix_y = LAGRANGE_SCALE[node_i]
-                for k in range(ORDER + 1):
+                for k in range(ORDER_PLUS_ONE):
                     if k != node_i:
                         prefix_y *= coords[0] - LOBATTO_COORDS[k]
@@ -441,7 +453,7 @@ class SquareSerendipityShapeFunctions:
             else:
                 prefix_y *= LAGRANGE_SCALE[node_j]
                 grad_y = float(0.0)
-                for k in range(ORDER + 1):
+                for k in range(ORDER_PLUS_ONE):
                     if k != node_j:
                         delta_y = coords[1] - LOBATTO_COORDS[k]
                         grad_y = grad_y * delta_y + prefix_y
@@ -452,7 +464,7 @@ class SquareSerendipityShapeFunctions:
             else:
                 prefix_x *= LAGRANGE_SCALE[node_i]
                 grad_x = float(0.0)
-                for k in range(ORDER + 1):
+                for k in range(ORDER_PLUS_ONE):
                     if k != node_i:
                         delta_x = coords[0] - LOBATTO_COORDS[k]
                         grad_x = grad_x * delta_x + prefix_x
@@ -530,7 +542,6 @@ class SquareNonConformingPolynomialShapeFunctions:
         NODES_PER_ELEMENT = self.NODES_PER_ELEMENT
         if self.ORDER == 2:
             # Intrinsic quadrature (order 2)
             @cache.dynamic_func(suffix=self.name)
             def node_quadrature_weight_quadratic(

warp/fem/space/tetmesh_function_space.py CHANGED Viewed

@@ -5,7 +5,7 @@ from warp.fem.geometry import Tetmesh
 from warp.fem import cache
 from .topology import SpaceTopology, DiscontinuousSpaceTopologyMixin, forward_base_topology
-from .basis_space import BasisSpace, TraceBasisSpace
+from .basis_space import ShapeBasisSpace, TraceBasisSpace
 from .shape import ShapeFunction, ConstantShapeFunction
 from .shape import TetrahedronPolynomialShapeFunctions, TetrahedronNonConformingPolynomialShapeFunctions
@@ -136,7 +136,7 @@ class TetmeshDiscontinuousSpaceTopology(
         super().__init__(mesh, shape.NODES_PER_ELEMENT)
-class TetmeshBasisSpace(BasisSpace):
+class TetmeshBasisSpace(ShapeBasisSpace):
     def __init__(self, topology: TetmeshSpaceTopology, shape: ShapeFunction):
         super().__init__(topology, shape)

warp/fem/space/topology.py CHANGED Viewed

@@ -227,6 +227,10 @@ class DiscontinuousSpaceTopologyMixin:
     def node_count(self):
         return self.geometry.cell_count() * self.NODES_PER_ELEMENT
+    @property
+    def name(self):
+        return f"{self.geometry.name}_D{self.NODES_PER_ELEMENT}"
     def _make_element_node_index(self):
         NODES_PER_ELEMENT = self.NODES_PER_ELEMENT
@@ -242,6 +246,12 @@ class DiscontinuousSpaceTopologyMixin:
         return element_node_index
+class DiscontinuousSpaceTopology(DiscontinuousSpaceTopologyMixin, SpaceTopology):
+    """Topology for generic discontinuous spaces"""
+    pass
 class DeformedGeometrySpaceTopology(SpaceTopology):
     def __init__(self, geometry: DeformedGeometry, base_topology: SpaceTopology):
         super().__init__(geometry, base_topology.NODES_PER_ELEMENT)

warp/fem/space/trimesh_2d_function_space.py CHANGED Viewed

@@ -5,7 +5,7 @@ from warp.fem.geometry import Trimesh2D
 from warp.fem import cache
 from .topology import SpaceTopology, DiscontinuousSpaceTopologyMixin, forward_base_topology
-from .basis_space import BasisSpace, TraceBasisSpace
+from .basis_space import ShapeBasisSpace, TraceBasisSpace
 from .shape import ShapeFunction, ConstantShapeFunction
 from .shape import Triangle2DPolynomialShapeFunctions, Triangle2DNonConformingPolynomialShapeFunctions
@@ -101,7 +101,7 @@ class Trimesh2DDiscontinuousSpaceTopology(
         super().__init__(mesh, shape.NODES_PER_ELEMENT)
-class Trimesh2DBasisSpace(BasisSpace):
+class Trimesh2DBasisSpace(ShapeBasisSpace):
     def __init__(self, topology: Trimesh2DSpaceTopology, shape: ShapeFunction):
         super().__init__(topology, shape)

warp/fem/utils.py CHANGED Viewed

@@ -1,10 +1,15 @@
 from typing import Any, Tuple
-import warp as wp
 import numpy as np
-from warp.utils import radix_sort_pairs, runlength_encode, array_scan
-from warp.fem.cache import borrow_temporary, borrow_temporary_like, TemporaryStore, Temporary
+import warp as wp
+from warp.fem.cache import (
+    Temporary,
+    TemporaryStore,
+    borrow_temporary,
+    borrow_temporary_like,
+)
+from warp.utils import array_scan, radix_sort_pairs, runlength_encode
 @wp.func
@@ -168,7 +173,7 @@ def compress_node_indices(
     if node_indices.device.is_cuda:
         unique_node_count_host = borrow_temporary(temporary_store, shape=(1,), dtype=int, pinned=True, device="cpu")
         wp.copy(src=unique_node_count_dev.array, dest=unique_node_count_host.array, count=1)
-        wp.synchronize_stream(wp.get_stream())
+        wp.synchronize_stream(wp.get_stream(node_indices.device))
         unique_node_count_dev.release()
         unique_node_count = int(unique_node_count_host.array.numpy()[0])
         unique_node_count_host.release()
@@ -217,7 +222,7 @@ def masked_indices(
     if offsets.device.is_cuda:
         masked_count_temp = borrow_temporary(temporary_store, shape=1, dtype=int, pinned=True, device="cpu")
         wp.copy(dest=masked_count_temp.array, src=offsets, src_offset=offsets.shape[0] - 1, count=1)
-        wp.synchronize_stream(wp.get_stream())
+        wp.synchronize_stream(wp.get_stream(offsets.device))
         masked_count = int(masked_count_temp.array.numpy()[0])
         masked_count_temp.release()
     else:

warp/native/array.h CHANGED Viewed

@@ -951,23 +951,64 @@ inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, int k,
 template<template<typename> class A1, template<typename> class A2, typename T>
 inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value, const T& adj_ret) {}
+// generic handler for scalar values
 template<template<typename> class A1, template<typename> class A2, typename T>
-inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret) {}
+inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret) {
+    if (buf.grad)
+        adj_atomic_minmax(&index(buf, i), &index_grad(buf, i), value, adj_value);
+    FP_VERIFY_ADJ_1(value, adj_value)
+}
 template<template<typename> class A1, template<typename> class A2, typename T>
-inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value, const T& adj_ret) {}
+inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value, const T& adj_ret) {
+    if (buf.grad)
+        adj_atomic_minmax(&index(buf, i, j), &index_grad(buf, i, j), value, adj_value);
+    FP_VERIFY_ADJ_2(value, adj_value)
+}
 template<template<typename> class A1, template<typename> class A2, typename T>
-inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value, const T& adj_ret) {}
+inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value, const T& adj_ret) {
+    if (buf.grad)
+        adj_atomic_minmax(&index(buf, i, j, k), &index_grad(buf, i, j, k), value, adj_value);
+    FP_VERIFY_ADJ_3(value, adj_value)
+}
 template<template<typename> class A1, template<typename> class A2, typename T>
-inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value, const T& adj_ret) {}
+inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value, const T& adj_ret) {
+    if (buf.grad)
+        adj_atomic_minmax(&index(buf, i, j, k, l), &index_grad(buf, i, j, k, l), value, adj_value);
+    FP_VERIFY_ADJ_4(value, adj_value)
+}
 template<template<typename> class A1, template<typename> class A2, typename T>
-inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret) {}
+inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret) {
+    if (buf.grad)
+        adj_atomic_minmax(&index(buf, i), &index_grad(buf, i), value, adj_value);
+    FP_VERIFY_ADJ_1(value, adj_value)
+}
 template<template<typename> class A1, template<typename> class A2, typename T>
-inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value, const T& adj_ret) {}
+inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value, const T& adj_ret) {
+    if (buf.grad)
+        adj_atomic_minmax(&index(buf, i, j), &index_grad(buf, i, j), value, adj_value);
+    FP_VERIFY_ADJ_2(value, adj_value)
+}
 template<template<typename> class A1, template<typename> class A2, typename T>
-inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value, const T& adj_ret) {}
+inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value, const T& adj_ret) {
+    if (buf.grad)
+        adj_atomic_minmax(&index(buf, i, j, k), &index_grad(buf, i, j, k), value, adj_value);
+    FP_VERIFY_ADJ_3(value, adj_value)
+}
 template<template<typename> class A1, template<typename> class A2, typename T>
-inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value, const T& adj_ret) {}
+inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value, const T& adj_ret) {
+    if (buf.grad)
+        adj_atomic_minmax(&index(buf, i, j, k, l), &index_grad(buf, i, j, k, l), value, adj_value);
+    FP_VERIFY_ADJ_4(value, adj_value)
+}
 } // namespace wp

warp/native/builtin.h CHANGED Viewed

@@ -295,7 +295,7 @@ inline CUDA_CALLABLE T rshift(T a, T b) { return a>>b; } \
 inline CUDA_CALLABLE T invert(T x) { return ~x; } \
 inline CUDA_CALLABLE bool isfinite(T x) { return true; } \
 inline CUDA_CALLABLE void adj_mul(T a, T b, T& adj_a, T& adj_b, T adj_ret) { } \
-inline CUDA_CALLABLE void adj_div(T a, T b, T& adj_a, T& adj_b, T adj_ret) { } \
+inline CUDA_CALLABLE void adj_div(T a, T b, T ret, T& adj_a, T& adj_b, T adj_ret) { } \
 inline CUDA_CALLABLE void adj_add(T a, T b, T& adj_a, T& adj_b, T adj_ret) { } \
 inline CUDA_CALLABLE void adj_sub(T a, T b, T& adj_a, T& adj_b, T adj_ret) { } \
 inline CUDA_CALLABLE void adj_mod(T a, T b, T& adj_a, T& adj_b, T adj_ret) { } \
@@ -443,10 +443,10 @@ inline CUDA_CALLABLE T div(T a, T b)\
     })\
     return a/b;\
 }\
-inline CUDA_CALLABLE void adj_div(T a, T b, T& adj_a, T& adj_b, T adj_ret)\
+inline CUDA_CALLABLE void adj_div(T a, T b, T ret, T& adj_a, T& adj_b, T adj_ret)\
 {\
     adj_a += adj_ret/b;\
-    adj_b -= adj_ret*(a/b)/b;\
+    adj_b -= adj_ret*(ret)/b;\
     DO_IF_FPCHECK(\
     if (!isfinite(adj_a) || !isfinite(adj_b))\
     {\
@@ -859,11 +859,11 @@ inline CUDA_CALLABLE void adj_log10(T a, T& adj_a, T adj_ret)\
         assert(0);\
     })\
 }\
-inline CUDA_CALLABLE void adj_exp(T a, T& adj_a, T adj_ret) { adj_a += exp(a)*adj_ret; }\
-inline CUDA_CALLABLE void adj_pow(T a, T b, T& adj_a, T& adj_b, T adj_ret)\
+inline CUDA_CALLABLE void adj_exp(T a, T ret, T& adj_a, T adj_ret) { adj_a += ret*adj_ret; }\
+inline CUDA_CALLABLE void adj_pow(T a, T b, T ret, T& adj_a, T& adj_b, T adj_ret)\
 { \
     adj_a += b*pow(a, b-T(1))*adj_ret;\
-    adj_b += log(a)*pow(a, b)*adj_ret;\
+    adj_b += log(a)*ret*adj_ret;\
     DO_IF_FPCHECK(if (!isfinite(adj_a) || !isfinite(adj_b))\
     {\
         printf("%s:%d - adj_pow(%f, %f, %f, %f, %f)\n", __FILE__, __LINE__, float(a), float(b), float(adj_a), float(adj_b), float(adj_ret));\
@@ -962,24 +962,22 @@ inline CUDA_CALLABLE void adj_cosh(T x, T& adj_x, T adj_ret)\
 {\
     adj_x += sinh(x)*adj_ret;\
 }\
-inline CUDA_CALLABLE void adj_tanh(T x, T& adj_x, T adj_ret)\
+inline CUDA_CALLABLE void adj_tanh(T x, T ret, T& adj_x, T adj_ret)\
 {\
-    T tanh_x = tanh(x);\
-    adj_x += (T(1) - tanh_x*tanh_x)*adj_ret;\
+    adj_x += (T(1) - ret*ret)*adj_ret;\
 }\
-inline CUDA_CALLABLE void adj_sqrt(T x, T& adj_x, T adj_ret)\
+inline CUDA_CALLABLE void adj_sqrt(T x, T ret, T& adj_x, T adj_ret)\
 {\
-    adj_x += T(0.5)*(T(1)/sqrt(x))*adj_ret;\
+    adj_x += T(0.5)*(T(1)/ret)*adj_ret;\
     DO_IF_FPCHECK(if (!isfinite(adj_x))\
     {\
         printf("%s:%d - adj_sqrt(%f, %f, %f)\n", __FILE__, __LINE__, float(x), float(adj_x), float(adj_ret));\
         assert(0);\
     })\
 }\
-inline CUDA_CALLABLE void adj_cbrt(T x, T& adj_x, T adj_ret)\
+inline CUDA_CALLABLE void adj_cbrt(T x, T ret, T& adj_x, T adj_ret)\
 {\
-    T cbrt_x = cbrt(x);\
-    adj_x += (T(1)/T(3))*(T(1)/(cbrt_x*cbrt_x))*adj_ret;\
+    adj_x += (T(1)/T(3))*(T(1)/(ret*ret))*adj_ret;\
     DO_IF_FPCHECK(if (!isfinite(adj_x))\
     {\
         printf("%s:%d - adj_cbrt(%f, %f, %f)\n", __FILE__, __LINE__, float(x), float(adj_x), float(adj_ret));\
@@ -1273,6 +1271,25 @@ inline CUDA_CALLABLE int atomic_min(int* address, int val)
 #endif
 }
+// default behavior for adjoint of atomic min/max operation that accumulates gradients for all elements matching the min/max value
+template <typename T>
+CUDA_CALLABLE inline void adj_atomic_minmax(T *addr, T *adj_addr, const T &value, T &adj_value)
+{
+    if (value == *addr)
+        adj_value += *adj_addr;
+}
+// for integral types we do not accumulate gradients
+CUDA_CALLABLE inline void adj_atomic_minmax(int8* buf, int8* adj_buf, const int8 &value, int8 &adj_value) { }
+CUDA_CALLABLE inline void adj_atomic_minmax(uint8* buf, uint8* adj_buf, const uint8 &value, uint8 &adj_value) { }
+CUDA_CALLABLE inline void adj_atomic_minmax(int16* buf, int16* adj_buf, const int16 &value, int16 &adj_value) { }
+CUDA_CALLABLE inline void adj_atomic_minmax(uint16* buf, uint16* adj_buf, const uint16 &value, uint16 &adj_value) { }
+CUDA_CALLABLE inline void adj_atomic_minmax(int32* buf, int32* adj_buf, const int32 &value, int32 &adj_value) { }
+CUDA_CALLABLE inline void adj_atomic_minmax(uint32* buf, uint32* adj_buf, const uint32 &value, uint32 &adj_value) { }
+CUDA_CALLABLE inline void adj_atomic_minmax(int64* buf, int64* adj_buf, const int64 &value, int64 &adj_value) { }
+CUDA_CALLABLE inline void adj_atomic_minmax(uint64* buf, uint64* adj_buf, const uint64 &value, uint64 &adj_value) { }
+CUDA_CALLABLE inline void adj_atomic_minmax(bool* buf, bool* adj_buf, const bool &value, bool &adj_value) { }
 } // namespace wp

warp/native/cuda_util.cpp CHANGED Viewed

@@ -89,6 +89,7 @@ static PFN_cuGraphicsResourceGetMappedPointer_v3020 pfn_cuGraphicsResourceGetMap
 static PFN_cuGraphicsGLRegisterBuffer_v3000 pfn_cuGraphicsGLRegisterBuffer;
 static PFN_cuGraphicsUnregisterResource_v3000 pfn_cuGraphicsUnregisterResource;
+static bool cuda_driver_initialized = false;
 bool ContextGuard::always_restore = false;
@@ -196,11 +197,15 @@ bool init_cuda_driver()
     get_driver_entry_point("cuGraphicsUnregisterResource", &(void*&)pfn_cuGraphicsUnregisterResource);
     if (pfn_cuInit)
-        return check_cu(pfn_cuInit(0));
-    else
-        return false;
+        cuda_driver_initialized = check_cu(pfn_cuInit(0));
+    return cuda_driver_initialized;
 }
+bool is_cuda_driver_initialized()
+{
+    return cuda_driver_initialized;
+}
 bool check_cuda_result(cudaError_t code, const char* file, int line)
 {

warp/native/cuda_util.h CHANGED Viewed

@@ -83,6 +83,7 @@ CUresult cuGraphicsUnregisterResource_f(CUgraphicsResource resource);
 bool init_cuda_driver();
+bool is_cuda_driver_initialized();
 bool check_cuda_result(cudaError_t code, const char* file, int line);
 inline bool check_cuda_result(uint64_t code, const char* file, int line)