PyPI - warp-lang - Versions diffs - 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl - Mend

warp-lang 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (271) hide show

docs/conf.py +17 -5
examples/env/env_ant.py +1 -1
examples/env/env_cartpole.py +1 -1
examples/env/env_humanoid.py +1 -1
examples/env/env_usd.py +4 -1
examples/env/environment.py +8 -9
examples/example_dem.py +34 -33
examples/example_diffray.py +364 -337
examples/example_fluid.py +32 -23
examples/example_jacobian_ik.py +97 -93
examples/example_marching_cubes.py +6 -16
examples/example_mesh.py +6 -16
examples/example_mesh_intersect.py +16 -14
examples/example_nvdb.py +14 -16
examples/example_raycast.py +14 -13
examples/example_raymarch.py +16 -23
examples/example_render_opengl.py +19 -10
examples/example_sim_cartpole.py +82 -78
examples/example_sim_cloth.py +45 -48
examples/example_sim_fk_grad.py +51 -44
examples/example_sim_fk_grad_torch.py +47 -40
examples/example_sim_grad_bounce.py +108 -133
examples/example_sim_grad_cloth.py +99 -113
examples/example_sim_granular.py +5 -6
examples/{example_sim_sdf_shape.py → example_sim_granular_collision_sdf.py} +37 -26
examples/example_sim_neo_hookean.py +51 -55
examples/example_sim_particle_chain.py +4 -4
examples/example_sim_quadruped.py +126 -81
examples/example_sim_rigid_chain.py +54 -61
examples/example_sim_rigid_contact.py +66 -70
examples/example_sim_rigid_fem.py +3 -3
examples/example_sim_rigid_force.py +1 -1
examples/example_sim_rigid_gyroscopic.py +3 -4
examples/example_sim_rigid_kinematics.py +28 -39
examples/example_sim_trajopt.py +112 -110
examples/example_sph.py +9 -8
examples/example_wave.py +7 -7
examples/fem/bsr_utils.py +30 -17
examples/fem/example_apic_fluid.py +85 -69
examples/fem/example_convection_diffusion.py +97 -93
examples/fem/example_convection_diffusion_dg.py +142 -149
examples/fem/example_convection_diffusion_dg0.py +141 -136
examples/fem/example_deformed_geometry.py +146 -0
examples/fem/example_diffusion.py +115 -84
examples/fem/example_diffusion_3d.py +116 -86
examples/fem/example_diffusion_mgpu.py +102 -79
examples/fem/example_mixed_elasticity.py +139 -100
examples/fem/example_navier_stokes.py +175 -162
examples/fem/example_stokes.py +143 -111
examples/fem/example_stokes_transfer.py +186 -157
examples/fem/mesh_utils.py +59 -97
examples/fem/plot_utils.py +138 -17
tools/ci/publishing/build_nodes_info.py +54 -0
warp/__init__.py +4 -3
warp/__init__.pyi +1 -0
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +5 -3
warp/build_dll.py +29 -9
warp/builtins.py +836 -492
warp/codegen.py +864 -553
warp/config.py +3 -1
warp/context.py +389 -172
warp/fem/__init__.py +24 -6
warp/fem/cache.py +318 -25
warp/fem/dirichlet.py +7 -3
warp/fem/domain.py +14 -0
warp/fem/field/__init__.py +30 -38
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +244 -138
warp/fem/field/restriction.py +8 -6
warp/fem/field/test.py +127 -59
warp/fem/field/trial.py +117 -60
warp/fem/geometry/__init__.py +5 -1
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +24 -1
warp/fem/geometry/geometry.py +86 -14
warp/fem/geometry/grid_2d.py +112 -54
warp/fem/geometry/grid_3d.py +134 -65
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +85 -33
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +451 -115
warp/fem/geometry/trimesh_2d.py +197 -92
warp/fem/integrate.py +534 -268
warp/fem/operator.py +58 -31
warp/fem/polynomial.py +11 -0
warp/fem/quadrature/__init__.py +1 -1
warp/fem/quadrature/pic_quadrature.py +150 -58
warp/fem/quadrature/quadrature.py +209 -57
warp/fem/space/__init__.py +230 -53
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +49 -2
warp/fem/space/function_space.py +90 -39
warp/fem/space/grid_2d_function_space.py +149 -496
warp/fem/space/grid_3d_function_space.py +173 -538
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +129 -76
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +46 -34
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +132 -1039
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +104 -742
warp/fem/types.py +13 -11
warp/fem/utils.py +335 -60
warp/native/array.h +120 -34
warp/native/builtin.h +101 -72
warp/native/bvh.cpp +73 -325
warp/native/bvh.cu +406 -23
warp/native/bvh.h +22 -40
warp/native/clang/clang.cpp +1 -0
warp/native/crt.h +2 -0
warp/native/cuda_util.cpp +8 -3
warp/native/cuda_util.h +1 -0
warp/native/exports.h +1522 -1243
warp/native/intersect.h +19 -4
warp/native/intersect_adj.h +8 -8
warp/native/mat.h +76 -17
warp/native/mesh.cpp +33 -108
warp/native/mesh.cu +114 -18
warp/native/mesh.h +395 -40
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +44 -34
warp/native/reduce.cpp +1 -1
warp/native/sparse.cpp +4 -4
warp/native/sparse.cu +163 -155
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +18 -14
warp/native/vec.h +103 -21
warp/native/warp.cpp +2 -1
warp/native/warp.cu +28 -3
warp/native/warp.h +4 -3
warp/render/render_opengl.py +261 -109
warp/sim/__init__.py +1 -2
warp/sim/articulation.py +385 -185
warp/sim/import_mjcf.py +59 -48
warp/sim/import_urdf.py +15 -15
warp/sim/import_usd.py +174 -102
warp/sim/inertia.py +17 -18
warp/sim/integrator_xpbd.py +4 -3
warp/sim/model.py +330 -250
warp/sim/render.py +1 -1
warp/sparse.py +625 -152
warp/stubs.py +341 -309
warp/tape.py +9 -6
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +94 -74
warp/tests/test_array.py +82 -101
warp/tests/test_array_reduce.py +57 -23
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +22 -12
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +18 -18
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +165 -134
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +75 -75
warp/tests/test_examples.py +237 -0
warp/tests/test_fabricarray.py +22 -24
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1034 -124
warp/tests/test_fp16.py +23 -16
warp/tests/test_func.py +187 -86
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +123 -181
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +35 -34
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +24 -25
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +14 -41
warp/tests/test_lerp.py +64 -65
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +517 -2898
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +304 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +60 -22
warp/tests/test_mesh_query_aabb.py +21 -25
warp/tests/test_mesh_query_point.py +111 -22
warp/tests/test_mesh_query_ray.py +12 -24
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +168 -20
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +261 -63
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +268 -63
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +90 -86
warp/tests/test_transient_module.py +10 -12
warp/tests/test_types.py +363 -0
warp/tests/test_utils.py +451 -0
warp/tests/test_vec.py +354 -2050
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +418 -376
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +291 -0
warp/tests/unittest_utils.py +342 -0
warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +589 -0
warp/types.py +622 -211
warp/utils.py +54 -393
warp_lang-1.0.0b6.dist-info/METADATA +238 -0
warp_lang-1.0.0b6.dist-info/RECORD +409 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
examples/example_cache_management.py +0 -40
examples/example_multigpu.py +0 -54
examples/example_struct.py +0 -65
examples/fem/example_stokes_transfer_3d.py +0 -210
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/fem/field/discrete_field.py +0 -80
warp/fem/space/nodal_function_space.py +0 -233
warp/tests/test_all.py +0 -223
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-1.0.0b2.dist-info/METADATA +0 -26
warp_lang-1.0.0b2.dist-info/RECORD +0 -380
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0

examples/example_diffray.py CHANGED Viewed

@@ -13,23 +13,21 @@
 #
 ##############################################################################
-import matplotlib.pyplot as plt
-import matplotlib.image as img
-import matplotlib.animation as animation
-from pxr import Usd, UsdGeom
+import math
+import os
-import warp as wp
 import numpy as np
+from pxr import Usd, UsdGeom
-import os
-import math
+import warp as wp
+from warp.optim import SGD
 wp.init()
 class RenderMode:
     """Rendering modes
-    grayscale: lambertian shading from multiple directional lights
+    grayscale: Lambertian shading from multiple directional lights
     texture: 2D texture map
     normal_map: mesh normal computed from interpolated vertex normals
     """
@@ -78,6 +76,208 @@ class DirectionalLights:
     num_lights: int
+@wp.kernel
+def vertex_normal_sum_kernel(
+    verts: wp.array(dtype=wp.vec3), indices: wp.array(dtype=int), normal_sums: wp.array(dtype=wp.vec3)
+):
+    tid = wp.tid()
+    i = indices[tid * 3]
+    j = indices[tid * 3 + 1]
+    k = indices[tid * 3 + 2]
+    a = verts[i]
+    b = verts[j]
+    c = verts[k]
+    ab = b - a
+    ac = c - a
+    area_normal = wp.cross(ab, ac)
+    wp.atomic_add(normal_sums, i, area_normal)
+    wp.atomic_add(normal_sums, j, area_normal)
+    wp.atomic_add(normal_sums, k, area_normal)
+@wp.kernel
+def normalize_kernel(
+    normal_sums: wp.array(dtype=wp.vec3),
+    vertex_normals: wp.array(dtype=wp.vec3),
+):
+    tid = wp.tid()
+    vertex_normals[tid] = wp.normalize(normal_sums[tid])
+@wp.func
+def texture_interpolation(tex_interp: wp.vec2, texture: wp.array2d(dtype=wp.vec3)):
+    tex_width = texture.shape[1]
+    tex_height = texture.shape[0]
+    tex = wp.vec2(tex_interp[0] * float(tex_width - 1), (1.0 - tex_interp[1]) * float(tex_height - 1))
+    x0 = int(tex[0])
+    x1 = x0 + 1
+    alpha_x = tex[0] - float(x0)
+    y0 = int(tex[1])
+    y1 = y0 + 1
+    alpha_y = tex[1] - float(y0)
+    c00 = texture[y0, x0]
+    c10 = texture[y0, x1]
+    c01 = texture[y1, x0]
+    c11 = texture[y1, x1]
+    lower = (1.0 - alpha_x) * c00 + alpha_x * c10
+    upper = (1.0 - alpha_x) * c01 + alpha_x * c11
+    color = (1.0 - alpha_y) * lower + alpha_y * upper
+    return color
+@wp.kernel
+def draw_kernel(
+    mesh: RenderMesh,
+    camera: Camera,
+    texture: wp.array2d(dtype=wp.vec3),
+    rays_width: int,
+    rays_height: int,
+    rays: wp.array(dtype=wp.vec3),
+    lights: DirectionalLights,
+    mode: int,
+):
+    tid = wp.tid()
+    x = tid % rays_width
+    y = rays_height - tid // rays_width
+    sx = 2.0 * float(x) / float(rays_width) - 1.0
+    sy = 2.0 * float(y) / float(rays_height) - 1.0
+    # compute view ray in world space
+    ro_world = camera.pos
+    rd_world = wp.normalize(wp.quat_rotate(camera.rot, wp.vec3(sx * camera.tan * camera.aspect, sy * camera.tan, -1.0)))
+    # compute view ray in mesh space
+    inv = wp.transform_inverse(wp.transform(mesh.pos[0], mesh.rot[0]))
+    ro = wp.transform_point(inv, ro_world)
+    rd = wp.transform_vector(inv, rd_world)
+    t = float(0.0)
+    ur = float(0.0)
+    vr = float(0.0)
+    sign = float(0.0)
+    n = wp.vec3()
+    f = int(0)
+    color = wp.vec3(0.0, 0.0, 0.0)
+    if wp.mesh_query_ray(mesh.id, ro, rd, 1.0e6, t, ur, vr, sign, n, f):
+        i = mesh.indices[f * 3]
+        j = mesh.indices[f * 3 + 1]
+        k = mesh.indices[f * 3 + 2]
+        a = mesh.vertices[i]
+        b = mesh.vertices[j]
+        c = mesh.vertices[k]
+        p = wp.mesh_eval_position(mesh.id, f, ur, vr)
+        # barycentric coordinates
+        tri_area = wp.length(wp.cross(b - a, c - a))
+        w = wp.length(wp.cross(b - a, p - a)) / tri_area
+        v = wp.length(wp.cross(p - a, c - a)) / tri_area
+        u = 1.0 - w - v
+        a_n = mesh.vertex_normals[i]
+        b_n = mesh.vertex_normals[j]
+        c_n = mesh.vertex_normals[k]
+        # vertex normal interpolation
+        normal = u * a_n + v * b_n + w * c_n
+        if mode == 0 or mode == 1:
+            if mode == 0:  # grayscale
+                color = wp.vec3(1.0)
+            elif mode == 1:  # texture interpolation
+                tex_a = mesh.tex_coords[mesh.tex_indices[f * 3]]
+                tex_b = mesh.tex_coords[mesh.tex_indices[f * 3 + 1]]
+                tex_c = mesh.tex_coords[mesh.tex_indices[f * 3 + 2]]
+                tex = u * tex_a + v * tex_b + w * tex_c
+                color = texture_interpolation(tex, texture)
+            # lambertian directional lighting
+            lambert = float(0.0)
+            for i in range(lights.num_lights):
+                dir = wp.transform_vector(inv, lights.dirs[i])
+                val = lights.intensities[i] * wp.dot(normal, dir)
+                if val < 0.0:
+                    val = 0.0
+                lambert = lambert + val
+            color = lambert * color
+        elif mode == 2:  # normal map
+            color = normal * 0.5 + wp.vec3(0.5, 0.5, 0.5)
+        if color[0] > 1.0:
+            color = wp.vec3(1.0, color[1], color[2])
+        if color[1] > 1.0:
+            color = wp.vec3(color[0], 1.0, color[2])
+        if color[2] > 1.0:
+            color = wp.vec3(color[0], color[1], 1.0)
+    rays[tid] = color
+@wp.kernel
+def downsample_kernel(
+    rays: wp.array(dtype=wp.vec3), pixels: wp.array(dtype=wp.vec3), rays_width: int, num_samples: int
+):
+    tid = wp.tid()
+    pixels_width = rays_width / num_samples
+    px = tid % pixels_width
+    py = tid // pixels_width
+    start_idx = py * num_samples * rays_width + px * num_samples
+    color = wp.vec3(0.0, 0.0, 0.0)
+    for i in range(0, num_samples):
+        for j in range(0, num_samples):
+            ray = rays[start_idx + i * rays_width + j]
+            color = wp.vec3(color[0] + ray[0], color[1] + ray[1], color[2] + ray[2])
+    num_samples_sq = float(num_samples * num_samples)
+    color = wp.vec3(color[0] / num_samples_sq, color[1] / num_samples_sq, color[2] / num_samples_sq)
+    pixels[tid] = color
+@wp.kernel
+def loss_kernel(pixels: wp.array(dtype=wp.vec3), target_pixels: wp.array(dtype=wp.vec3), loss: wp.array(dtype=float)):
+    tid = wp.tid()
+    pixel = pixels[tid]
+    target_pixel = target_pixels[tid]
+    diff = target_pixel - pixel
+    # pseudo Huber loss
+    delta = 1.0
+    x = delta * delta * (wp.sqrt(1.0 + (diff[0] / delta) * (diff[0] / delta)) - 1.0)
+    y = delta * delta * (wp.sqrt(1.0 + (diff[1] / delta) * (diff[1] / delta)) - 1.0)
+    z = delta * delta * (wp.sqrt(1.0 + (diff[2] / delta) * (diff[2] / delta)) - 1.0)
+    sum = x + y + z
+    wp.atomic_add(loss, 0, sum)
+@wp.kernel
+def normalize(x: wp.array(dtype=wp.quat)):
+    tid = wp.tid()
+    x[tid] = wp.normalize(x[tid])
 class Example:
     """A basic differentiable ray tracer
@@ -103,7 +303,10 @@ class Example:
     render_mesh.tex_coords: 2D texture coordinates
     """
-    def __init__(self):
+    def __init__(self, stage=None, rot_array=[0.0, 0.0, 0.0, 1.0], verbose=False):
+        self.device = wp.get_device()
+        self.verbose = verbose
         cam_pos = wp.vec3(0.0, 0.75, 7.0)
         cam_rot = wp.quat(0.0, 0.0, 0.0, 1.0)
         horizontal_aperture = 36.0
@@ -143,328 +346,126 @@ class Example:
         # set training iterations
         self.train_rate = 3.0e-8
-        self.train_iters = 300
+        self.train_rate = 5.00e-8
+        self.momentum = 0.5
+        self.dampening = 0.1
+        self.weight_decay = 0.0
+        self.train_iters = 150
         self.period = 10
+        self.iter = 0
         # storage for training animation
         self.images = np.zeros((self.height, self.width, 3, int(self.train_iters / self.period)))
+        self.image_counter = 0
-        with wp.ScopedDevice(device="cuda:0"):
-            # construct RenderMesh
-            self.render_mesh = RenderMesh()
-            self.mesh = wp.Mesh(
-                points=wp.array(points, dtype=wp.vec3, requires_grad=True), indices=wp.array(indices, dtype=int)
-            )
-            self.render_mesh.id = self.mesh.id
-            self.render_mesh.vertices = self.mesh.points
-            self.render_mesh.indices = self.mesh.indices
-            self.render_mesh.tex_coords = wp.array(tex_coords, dtype=wp.vec2, requires_grad=True)
-            self.render_mesh.tex_indices = wp.array(tex_indices, dtype=int)
-            self.normal_sums = wp.zeros(num_points, dtype=wp.vec3, requires_grad=True)
-            self.render_mesh.vertex_normals = wp.zeros(num_points, dtype=wp.vec3, requires_grad=True)
-            self.render_mesh.pos = wp.zeros(1, dtype=wp.vec3, requires_grad=True)
-            self.render_mesh.rot = wp.array(np.array([0.0, 0.0, 0.0, 1.0]), dtype=wp.quat, requires_grad=True)
-            # compute vertex normals
-            wp.launch(
-                kernel=Example.vertex_normal_sum_kernel,
-                dim=num_faces,
-                inputs=[self.render_mesh.vertices, self.render_mesh.indices, self.normal_sums],
-            )
-            wp.launch(
-                kernel=Example.normalize_kernel,
-                dim=num_points,
-                inputs=[self.normal_sums, self.render_mesh.vertex_normals],
-            )
-            # construct camera
-            self.camera = Camera()
-            self.camera.horizontal = horizontal_aperture
-            self.camera.vertical = vertical_aperture
-            self.camera.aspect = aspect
-            self.camera.e = focal_length
-            self.camera.tan = vertical_aperture / (2.0 * focal_length)
-            self.camera.pos = cam_pos
-            self.camera.rot = cam_rot
-            # construct texture
-            self.texture = wp.array2d(texture_host, dtype=wp.vec3, requires_grad=True)
-            # construct lights
-            self.lights = DirectionalLights()
-            self.lights.dirs = wp.array(np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]), dtype=wp.vec3, requires_grad=True)
-            self.lights.intensities = wp.array(np.array([2.0, 0.2]), dtype=float, requires_grad=True)
-            self.lights.num_lights = 2
-            # construct rays
-            self.rays_width = self.width * pow(2, self.num_samples)
-            self.rays_height = self.height * pow(2, self.num_samples)
-            self.num_rays = self.rays_width * self.rays_height
-            self.rays = wp.zeros(self.num_rays, dtype=wp.vec3, requires_grad=True)
-            # construct pixels
-            self.pixels = wp.zeros(self.num_pixels, dtype=wp.vec3, requires_grad=True)
-            self.target_pixels = wp.zeros(self.num_pixels, dtype=wp.vec3)
-            # loss array
-            self.loss = wp.zeros(1, dtype=float, requires_grad=True)
-    def update(self):
-        pass
-    def render(self, is_live=False):
-        with wp.ScopedDevice("cuda:0"):
-            # raycast
-            wp.launch(
-                kernel=Example.draw_kernel,
-                dim=self.num_rays,
-                inputs=[
-                    self.render_mesh,
-                    self.camera,
-                    self.texture,
-                    self.rays_width,
-                    self.rays_height,
-                    self.rays,
-                    self.lights,
-                    self.render_mode,
-                ],
-            )
-            # downsample
-            wp.launch(
-                kernel=Example.downsample_kernel,
-                dim=self.num_pixels,
-                inputs=[self.rays, self.pixels, self.rays_width, pow(2, self.num_samples)],
-            )
-    @wp.kernel
-    def vertex_normal_sum_kernel(
-        verts: wp.array(dtype=wp.vec3), indices: wp.array(dtype=int), normal_sums: wp.array(dtype=wp.vec3)
-    ):
-        tid = wp.tid()
-        i = indices[tid * 3]
-        j = indices[tid * 3 + 1]
-        k = indices[tid * 3 + 2]
-        a = verts[i]
-        b = verts[j]
-        c = verts[k]
-        ab = b - a
-        ac = c - a
-        area_normal = wp.cross(ab, ac)
-        wp.atomic_add(normal_sums, i, area_normal)
-        wp.atomic_add(normal_sums, j, area_normal)
-        wp.atomic_add(normal_sums, k, area_normal)
-    @wp.kernel
-    def normalize_kernel(
-        normal_sums: wp.array(dtype=wp.vec3),
-        vertex_normals: wp.array(dtype=wp.vec3),
-    ):
-        tid = wp.tid()
-        vertex_normals[tid] = wp.normalize(normal_sums[tid])
-    @wp.func
-    def texture_interpolation(tex_interp: wp.vec2, texture: wp.array2d(dtype=wp.vec3)):
-        tex_width = texture.shape[1]
-        tex_height = texture.shape[0]
-        tex = wp.vec2(tex_interp[0] * float(tex_width - 1), (1.0 - tex_interp[1]) * float(tex_height - 1))
-        x0 = int(tex[0])
-        x1 = x0 + 1
-        alpha_x = tex[0] - float(x0)
-        y0 = int(tex[1])
-        y1 = y0 + 1
-        alpha_y = tex[1] - float(y0)
-        c00 = texture[y0, x0]
-        c10 = texture[y0, x1]
-        c01 = texture[y1, x0]
-        c11 = texture[y1, x1]
-        lower = (1.0 - alpha_x) * c00 + alpha_x * c10
-        upper = (1.0 - alpha_x) * c01 + alpha_x * c11
-        color = (1.0 - alpha_y) * lower + alpha_y * upper
-        return color
-    @wp.kernel
-    def draw_kernel(
-        mesh: RenderMesh,
-        camera: Camera,
-        texture: wp.array2d(dtype=wp.vec3),
-        rays_width: int,
-        rays_height: int,
-        rays: wp.array(dtype=wp.vec3),
-        lights: DirectionalLights,
-        mode: int,
-    ):
-        tid = wp.tid()
-        x = tid % rays_width
-        y = rays_height - tid // rays_width
-        sx = 2.0 * float(x) / float(rays_width) - 1.0
-        sy = 2.0 * float(y) / float(rays_height) - 1.0
-        # compute view ray in world space
-        ro_world = camera.pos
-        rd_world = wp.normalize(
-            wp.quat_rotate(camera.rot, wp.vec3(sx * camera.tan * camera.aspect, sy * camera.tan, -1.0))
+        # construct RenderMesh
+        self.render_mesh = RenderMesh()
+        self.mesh = wp.Mesh(
+            points=wp.array(points, dtype=wp.vec3, requires_grad=True), indices=wp.array(indices, dtype=int)
+        )
+        self.render_mesh.id = self.mesh.id
+        self.render_mesh.vertices = self.mesh.points
+        self.render_mesh.indices = self.mesh.indices
+        self.render_mesh.tex_coords = wp.array(tex_coords, dtype=wp.vec2, requires_grad=True)
+        self.render_mesh.tex_indices = wp.array(tex_indices, dtype=int)
+        self.normal_sums = wp.zeros(num_points, dtype=wp.vec3, requires_grad=True)
+        self.render_mesh.vertex_normals = wp.zeros(num_points, dtype=wp.vec3, requires_grad=True)
+        self.render_mesh.pos = wp.zeros(1, dtype=wp.vec3, requires_grad=True)
+        self.render_mesh.rot = wp.array(np.array(rot_array), dtype=wp.quat, requires_grad=True)
+        # compute vertex normals
+        wp.launch(
+            kernel=vertex_normal_sum_kernel,
+            dim=num_faces,
+            inputs=[self.render_mesh.vertices, self.render_mesh.indices, self.normal_sums],
+        )
+        wp.launch(
+            kernel=normalize_kernel,
+            dim=num_points,
+            inputs=[self.normal_sums, self.render_mesh.vertex_normals],
         )
-        # compute view ray in mesh space
-        inv = wp.transform_inverse(wp.transform(mesh.pos[0], mesh.rot[0]))
-        ro = wp.transform_point(inv, ro_world)
-        rd = wp.transform_vector(inv, rd_world)
-        t = float(0.0)
-        ur = float(0.0)
-        vr = float(0.0)
-        sign = float(0.0)
-        n = wp.vec3()
-        f = int(0)
-        color = wp.vec3(0.0, 0.0, 0.0)
-        if wp.mesh_query_ray(mesh.id, ro, rd, 1.0e6, t, ur, vr, sign, n, f):
-            i = mesh.indices[f * 3]
-            j = mesh.indices[f * 3 + 1]
-            k = mesh.indices[f * 3 + 2]
-            a = mesh.vertices[i]
-            b = mesh.vertices[j]
-            c = mesh.vertices[k]
-            p = wp.mesh_eval_position(mesh.id, f, ur, vr)
-            # barycentric coordinates
-            tri_area = wp.length(wp.cross(b - a, c - a))
-            w = wp.length(wp.cross(b - a, p - a)) / tri_area
-            v = wp.length(wp.cross(p - a, c - a)) / tri_area
-            u = 1.0 - w - v
-            a_n = mesh.vertex_normals[i]
-            b_n = mesh.vertex_normals[j]
-            c_n = mesh.vertex_normals[k]
-            # vertex normal interpolation
-            normal = u * a_n + v * b_n + w * c_n
-            if mode == 0 or mode == 1:
-                if mode == 0:  # grayscale
-                    color = wp.vec3(1.0)
-                elif mode == 1:  # texture interpolation
-                    tex_a = mesh.tex_coords[mesh.tex_indices[f * 3]]
-                    tex_b = mesh.tex_coords[mesh.tex_indices[f * 3 + 1]]
-                    tex_c = mesh.tex_coords[mesh.tex_indices[f * 3 + 2]]
-                    tex = u * tex_a + v * tex_b + w * tex_c
-                    color = Example.texture_interpolation(tex, texture)
-                # lambertian directional lighting
-                lambert = float(0.0)
-                for i in range(lights.num_lights):
-                    dir = wp.transform_vector(inv, lights.dirs[i])
-                    val = lights.intensities[i] * wp.dot(normal, dir)
-                    if val < 0.0:
-                        val = 0.0
-                    lambert = lambert + val
-                color = lambert * color
-            elif mode == 2:  # normal map
-                color = normal * 0.5 + wp.vec3(0.5, 0.5, 0.5)
-            if color[0] > 1.0:
-                color = wp.vec3(1.0, color[1], color[2])
-            if color[1] > 1.0:
-                color = wp.vec3(color[0], 1.0, color[2])
-            if color[2] > 1.0:
-                color = wp.vec3(color[0], color[1], 1.0)
-        rays[tid] = color
-    @wp.kernel
-    def downsample_kernel(
-        rays: wp.array(dtype=wp.vec3), pixels: wp.array(dtype=wp.vec3), rays_width: int, num_samples: int
-    ):
-        tid = wp.tid()
-        pixels_width = rays_width / num_samples
-        px = tid % pixels_width
-        py = tid // pixels_width
-        start_idx = py * num_samples * rays_width + px * num_samples
-        color = wp.vec3(0.0, 0.0, 0.0)
-        for i in range(0, num_samples):
-            for j in range(0, num_samples):
-                ray = rays[start_idx + i * rays_width + j]
-                color = wp.vec3(color[0] + ray[0], color[1] + ray[1], color[2] + ray[2])
-        num_samples_sq = float(num_samples * num_samples)
-        color = wp.vec3(color[0] / num_samples_sq, color[1] / num_samples_sq, color[2] / num_samples_sq)
-        pixels[tid] = color
-    @wp.kernel
-    def loss_kernel(
-        pixels: wp.array(dtype=wp.vec3), target_pixels: wp.array(dtype=wp.vec3), loss: wp.array(dtype=float)
-    ):
-        tid = wp.tid()
-        pixel = pixels[tid]
-        target_pixel = target_pixels[tid]
-        diff = target_pixel - pixel
-        # pseudo Huber loss
-        delta = 1.0
-        x = delta * delta * (wp.sqrt(1.0 + (diff[0] / delta) * (diff[0] / delta)) - 1.0)
-        y = delta * delta * (wp.sqrt(1.0 + (diff[1] / delta) * (diff[1] / delta)) - 1.0)
-        z = delta * delta * (wp.sqrt(1.0 + (diff[2] / delta) * (diff[2] / delta)) - 1.0)
-        sum = x + y + z
-        wp.atomic_add(loss, 0, sum)
-    @wp.kernel
-    def step_kernel(x: wp.array(dtype=wp.quat), grad: wp.array(dtype=wp.quat), alpha: float):
-        tid = wp.tid()
-        # projected gradient descent
-        x[tid] = wp.normalize(wp.sub(x[tid], wp.mul(grad[tid], alpha)))
-    def compute_loss(self):
-        self.render()
-        wp.launch(self.loss_kernel, dim=self.num_pixels, inputs=[self.pixels, self.target_pixels, self.loss])
-    def train_graph(self):
-        with wp.ScopedDevice("cuda:0"):
-            # capture graph
-            wp.capture_begin()
-            tape = wp.Tape()
-            with tape:
+        # construct camera
+        self.camera = Camera()
+        self.camera.horizontal = horizontal_aperture
+        self.camera.vertical = vertical_aperture
+        self.camera.aspect = aspect
+        self.camera.e = focal_length
+        self.camera.tan = vertical_aperture / (2.0 * focal_length)
+        self.camera.pos = cam_pos
+        self.camera.rot = cam_rot
+        # construct texture
+        self.texture = wp.array2d(texture_host, dtype=wp.vec3, requires_grad=True)
+        # construct lights
+        self.lights = DirectionalLights()
+        self.lights.dirs = wp.array(np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]), dtype=wp.vec3, requires_grad=True)
+        self.lights.intensities = wp.array(np.array([2.0, 0.2]), dtype=float, requires_grad=True)
+        self.lights.num_lights = 2
+        # construct rays
+        self.rays_width = self.width * pow(2, self.num_samples)
+        self.rays_height = self.height * pow(2, self.num_samples)
+        self.num_rays = self.rays_width * self.rays_height
+        self.rays = wp.zeros(self.num_rays, dtype=wp.vec3, requires_grad=True)
+        # construct pixels
+        self.pixels = wp.zeros(self.num_pixels, dtype=wp.vec3, requires_grad=True)
+        self.target_pixels = wp.zeros(self.num_pixels, dtype=wp.vec3)
+        # loss array
+        self.loss = wp.zeros(1, dtype=float, requires_grad=True)
+        # capture graph
+        wp.capture_begin(self.device)
+        try:
+            self.tape = wp.Tape()
+            with self.tape:
                 self.compute_loss()
-            tape.backward(self.loss)
-            self.graph = wp.capture_end()
+            self.tape.backward(self.loss)
+        finally:
+            self.graph = wp.capture_end(self.device)
+        self.optimizer = SGD(
+            [self.render_mesh.rot],
+            self.train_rate,
+            momentum=self.momentum,
+            dampening=self.dampening,
+            weight_decay=self.weight_decay,
+        )
-            # train
-            image_counter = 0
-            for i in range(self.train_iters):
-                wp.capture_launch(self.graph)
-                rot_grad = tape.gradients[self.render_mesh.rot]
-                wp.launch(Example.step_kernel, dim=1, inputs=[self.render_mesh.rot, rot_grad, self.train_rate])
+    def ray_trace(self, is_live=False):
+        # raycast
+        wp.launch(
+            kernel=draw_kernel,
+            dim=self.num_rays,
+            inputs=[
+                self.render_mesh,
+                self.camera,
+                self.texture,
+                self.rays_width,
+                self.rays_height,
+                self.rays,
+                self.lights,
+                self.render_mode,
+            ],
+            device=self.device,
+        )
-                if i % self.period == 0:
-                    print(f"Iter: {i} Loss: {self.loss}")
-                    self.images[:, :, :, image_counter] = self.get_image()
-                    image_counter += 1
+        # downsample
+        wp.launch(
+            kernel=downsample_kernel,
+            dim=self.num_pixels,
+            inputs=[self.rays, self.pixels, self.rays_width, pow(2, self.num_samples)],
+            device=self.device,
+        )
-                tape.zero()
-                self.loss.zero_()
+    def compute_loss(self):
+        self.ray_trace()
+        wp.launch(
+            loss_kernel, dim=self.num_pixels, inputs=[self.pixels, self.target_pixels, self.loss], device=self.device
+        )
     def get_image(self):
         return self.pixels.numpy().reshape((self.height, self.width, 3))
@@ -483,33 +484,59 @@ class Example:
         ani = animation.ArtistAnimation(fig, frames, interval=50, blit=True, repeat_delay=1000)
         return ani
+    def update(self):
+        wp.capture_launch(self.graph)
+        rot_grad = self.tape.gradients[self.render_mesh.rot]
+        self.optimizer.step([rot_grad])
+        wp.launch(normalize, dim=1, inputs=[self.render_mesh.rot])
+        if self.verbose and self.iter % self.period == 0:
+            print(f"Iter: {self.iter} Loss: {self.loss}")
+        self.tape.zero()
+        self.loss.zero_()
+        self.iter = self.iter + 1
+    def render(self):
+        self.images[:, :, :, self.image_counter] = self.get_image()
+        self.image_counter += 1
+    def train_graph(self):
+        # train
+        for i in range(self.train_iters):
+            self.update()
+            if i % self.period == 0:
+                self.render()
 if __name__ == "__main__":
+    import matplotlib.animation as animation
+    import matplotlib.image as img
+    import matplotlib.pyplot as plt
     output_dir = os.path.join(os.path.dirname(__file__), "outputs")
-    example = Example()
+    reference_example = Example()
     # render target rotation
-    example.render()
-    with wp.ScopedDevice(device="cuda:0"):
-        wp.copy(example.target_pixels, example.pixels)
-    target_image = example.get_image()
+    reference_example.ray_trace()
+    target_image = reference_example.get_image()
     img.imsave(output_dir + "/target_image.png", target_image)
     # offset mesh rotation
-    with wp.ScopedDevice(device="cuda:0"):
-        example.render_mesh.rot = wp.array(
-            np.array(
-                [0.0, (math.sqrt(3) - 1) / (2.0 * math.sqrt(2.0)), 0.0, (math.sqrt(3) + 1) / (2.0 * math.sqrt(2.0))]
-            ),
-            dtype=wp.quat,
-            requires_grad=True,
-        )
+    rotated_example = Example(
+        rot_array=[0.0, (math.sqrt(3) - 1) / (2.0 * math.sqrt(2.0)), 0.0, (math.sqrt(3) + 1) / (2.0 * math.sqrt(2.0))],
+        verbose=True,
+    )
+    wp.copy(rotated_example.target_pixels, reference_example.pixels)
     # recover target rotation
-    example.train_graph()
-    final_image = example.get_image()
+    rotated_example.train_graph()
+    final_image = rotated_example.get_image()
     img.imsave(output_dir + "/final_image.png", final_image)
-    video = example.get_animation()
-    video.save(output_dir + "/animation.gif", dpi=300, writer=animation.PillowWriter(fps=15))
+    video = rotated_example.get_animation()
+    video.save(output_dir + "/animation.gif", dpi=300, writer=animation.PillowWriter(fps=5))