PyPI - warp-lang - Versions diffs - 1.1.0__py3-none-manylinux2014_aarch64.whl → 1.2.1__py3-none-manylinux2014_aarch64.whl - Mend

warp-lang 1.1.0__py3-none-manylinux2014_aarch64.whl → 1.2.1__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (218) hide show

warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +10 -37
warp/build_dll.py +2 -2
warp/builtins.py +274 -6
warp/codegen.py +51 -4
warp/config.py +2 -2
warp/constants.py +4 -0
warp/context.py +422 -203
warp/examples/benchmarks/benchmark_api.py +0 -2
warp/examples/benchmarks/benchmark_cloth_warp.py +0 -1
warp/examples/benchmarks/benchmark_launches.py +0 -2
warp/examples/core/example_dem.py +0 -2
warp/examples/core/example_fluid.py +0 -2
warp/examples/core/example_graph_capture.py +0 -2
warp/examples/core/example_marching_cubes.py +0 -2
warp/examples/core/example_mesh.py +0 -2
warp/examples/core/example_mesh_intersect.py +0 -2
warp/examples/core/example_nvdb.py +0 -2
warp/examples/core/example_raycast.py +0 -2
warp/examples/core/example_raymarch.py +0 -2
warp/examples/core/example_render_opengl.py +0 -2
warp/examples/core/example_sph.py +0 -2
warp/examples/core/example_torch.py +0 -3
warp/examples/core/example_wave.py +0 -2
warp/examples/fem/example_apic_fluid.py +140 -115
warp/examples/fem/example_burgers.py +262 -0
warp/examples/fem/example_convection_diffusion.py +0 -2
warp/examples/fem/example_convection_diffusion_dg.py +0 -2
warp/examples/fem/example_deformed_geometry.py +0 -2
warp/examples/fem/example_diffusion.py +0 -2
warp/examples/fem/example_diffusion_3d.py +5 -4
warp/examples/fem/example_diffusion_mgpu.py +0 -2
warp/examples/fem/example_mixed_elasticity.py +0 -2
warp/examples/fem/example_navier_stokes.py +0 -2
warp/examples/fem/example_stokes.py +0 -2
warp/examples/fem/example_stokes_transfer.py +0 -2
warp/examples/optim/example_bounce.py +0 -2
warp/examples/optim/example_cloth_throw.py +0 -2
warp/examples/optim/example_diffray.py +0 -2
warp/examples/optim/example_drone.py +0 -2
warp/examples/optim/example_inverse_kinematics.py +0 -2
warp/examples/optim/example_inverse_kinematics_torch.py +0 -2
warp/examples/optim/example_spring_cage.py +0 -2
warp/examples/optim/example_trajectory.py +0 -2
warp/examples/optim/example_walker.py +0 -2
warp/examples/sim/example_cartpole.py +0 -2
warp/examples/sim/example_cloth.py +0 -2
warp/examples/sim/example_granular.py +0 -2
warp/examples/sim/example_granular_collision_sdf.py +0 -2
warp/examples/sim/example_jacobian_ik.py +0 -2
warp/examples/sim/example_particle_chain.py +0 -2
warp/examples/sim/example_quadruped.py +0 -2
warp/examples/sim/example_rigid_chain.py +0 -2
warp/examples/sim/example_rigid_contact.py +0 -2
warp/examples/sim/example_rigid_force.py +0 -2
warp/examples/sim/example_rigid_gyroscopic.py +0 -2
warp/examples/sim/example_rigid_soft_contact.py +0 -2
warp/examples/sim/example_soft_body.py +0 -2
warp/fem/__init__.py +1 -0
warp/fem/cache.py +3 -1
warp/fem/geometry/__init__.py +1 -0
warp/fem/geometry/element.py +4 -0
warp/fem/geometry/grid_3d.py +0 -4
warp/fem/geometry/nanogrid.py +455 -0
warp/fem/integrate.py +63 -9
warp/fem/space/__init__.py +43 -158
warp/fem/space/basis_space.py +34 -0
warp/fem/space/collocated_function_space.py +1 -1
warp/fem/space/grid_2d_function_space.py +13 -132
warp/fem/space/grid_3d_function_space.py +16 -154
warp/fem/space/hexmesh_function_space.py +37 -134
warp/fem/space/nanogrid_function_space.py +202 -0
warp/fem/space/quadmesh_2d_function_space.py +12 -119
warp/fem/space/restriction.py +4 -1
warp/fem/space/shape/__init__.py +77 -0
warp/fem/space/shape/cube_shape_function.py +5 -15
warp/fem/space/tetmesh_function_space.py +6 -76
warp/fem/space/trimesh_2d_function_space.py +6 -76
warp/native/array.h +12 -3
warp/native/builtin.h +48 -5
warp/native/bvh.cpp +14 -10
warp/native/bvh.cu +23 -15
warp/native/bvh.h +1 -0
warp/native/clang/clang.cpp +2 -1
warp/native/crt.cpp +11 -1
warp/native/crt.h +18 -1
warp/native/exports.h +187 -0
warp/native/mat.h +47 -0
warp/native/mesh.cpp +1 -1
warp/native/mesh.cu +1 -2
warp/native/nanovdb/GridHandle.h +366 -0
warp/native/nanovdb/HostBuffer.h +590 -0
warp/native/nanovdb/NanoVDB.h +3999 -2157
warp/native/nanovdb/PNanoVDB.h +936 -99
warp/native/quat.h +28 -1
warp/native/rand.h +5 -1
warp/native/vec.h +45 -1
warp/native/volume.cpp +335 -103
warp/native/volume.cu +39 -13
warp/native/volume.h +725 -303
warp/native/volume_builder.cu +381 -360
warp/native/volume_builder.h +16 -1
warp/native/volume_impl.h +61 -0
warp/native/warp.cu +8 -2
warp/native/warp.h +15 -7
warp/render/render_opengl.py +191 -52
warp/sim/integrator_featherstone.py +10 -3
warp/sim/integrator_xpbd.py +16 -22
warp/sparse.py +89 -27
warp/stubs.py +83 -0
warp/tests/assets/test_index_grid.nvdb +0 -0
warp/tests/aux_test_dependent.py +0 -2
warp/tests/aux_test_grad_customs.py +0 -2
warp/tests/aux_test_reference.py +0 -2
warp/tests/aux_test_reference_reference.py +0 -2
warp/tests/aux_test_square.py +0 -2
warp/tests/disabled_kinematics.py +0 -2
warp/tests/test_adam.py +0 -2
warp/tests/test_arithmetic.py +0 -36
warp/tests/test_array.py +9 -11
warp/tests/test_array_reduce.py +0 -2
warp/tests/test_async.py +0 -2
warp/tests/test_atomic.py +0 -2
warp/tests/test_bool.py +58 -50
warp/tests/test_builtins_resolution.py +0 -2
warp/tests/test_bvh.py +0 -2
warp/tests/test_closest_point_edge_edge.py +0 -1
warp/tests/test_codegen.py +0 -4
warp/tests/test_compile_consts.py +130 -10
warp/tests/test_conditional.py +0 -2
warp/tests/test_copy.py +0 -2
warp/tests/test_ctypes.py +6 -8
warp/tests/test_dense.py +0 -2
warp/tests/test_devices.py +0 -2
warp/tests/test_dlpack.py +9 -11
warp/tests/test_examples.py +42 -39
warp/tests/test_fabricarray.py +0 -3
warp/tests/test_fast_math.py +0 -2
warp/tests/test_fem.py +75 -54
warp/tests/test_fp16.py +0 -2
warp/tests/test_func.py +0 -2
warp/tests/test_generics.py +27 -2
warp/tests/test_grad.py +147 -8
warp/tests/test_grad_customs.py +0 -2
warp/tests/test_hash_grid.py +1 -3
warp/tests/test_import.py +0 -2
warp/tests/test_indexedarray.py +0 -2
warp/tests/test_intersect.py +0 -2
warp/tests/test_jax.py +0 -2
warp/tests/test_large.py +11 -9
warp/tests/test_launch.py +0 -2
warp/tests/test_lerp.py +10 -54
warp/tests/test_linear_solvers.py +3 -5
warp/tests/test_lvalue.py +0 -2
warp/tests/test_marching_cubes.py +0 -2
warp/tests/test_mat.py +0 -2
warp/tests/test_mat_lite.py +0 -2
warp/tests/test_mat_scalar_ops.py +0 -2
warp/tests/test_math.py +0 -2
warp/tests/test_matmul.py +35 -37
warp/tests/test_matmul_lite.py +29 -31
warp/tests/test_mempool.py +0 -2
warp/tests/test_mesh.py +0 -3
warp/tests/test_mesh_query_aabb.py +0 -2
warp/tests/test_mesh_query_point.py +0 -2
warp/tests/test_mesh_query_ray.py +0 -2
warp/tests/test_mlp.py +0 -2
warp/tests/test_model.py +0 -2
warp/tests/test_module_hashing.py +111 -0
warp/tests/test_modules_lite.py +0 -3
warp/tests/test_multigpu.py +0 -2
warp/tests/test_noise.py +0 -4
warp/tests/test_operators.py +0 -2
warp/tests/test_options.py +0 -2
warp/tests/test_peer.py +0 -2
warp/tests/test_pinned.py +0 -2
warp/tests/test_print.py +0 -2
warp/tests/test_quat.py +0 -2
warp/tests/test_rand.py +41 -5
warp/tests/test_reload.py +0 -10
warp/tests/test_rounding.py +0 -2
warp/tests/test_runlength_encode.py +0 -2
warp/tests/test_sim_grad.py +0 -2
warp/tests/test_sim_kinematics.py +0 -2
warp/tests/test_smoothstep.py +0 -2
warp/tests/test_snippet.py +0 -2
warp/tests/test_sparse.py +0 -2
warp/tests/test_spatial.py +0 -2
warp/tests/test_special_values.py +362 -0
warp/tests/test_streams.py +0 -2
warp/tests/test_struct.py +0 -2
warp/tests/test_tape.py +0 -2
warp/tests/test_torch.py +0 -2
warp/tests/test_transient_module.py +0 -2
warp/tests/test_types.py +0 -2
warp/tests/test_utils.py +0 -2
warp/tests/test_vec.py +0 -2
warp/tests/test_vec_lite.py +0 -2
warp/tests/test_vec_scalar_ops.py +0 -2
warp/tests/test_verify_fp.py +0 -2
warp/tests/test_volume.py +237 -13
warp/tests/test_volume_write.py +86 -3
warp/tests/unittest_serial.py +10 -9
warp/tests/unittest_suites.py +6 -2
warp/tests/unittest_utils.py +2 -171
warp/tests/unused_test_misc.py +0 -2
warp/tests/walkthrough_debug.py +1 -1
warp/thirdparty/unittest_parallel.py +37 -40
warp/types.py +526 -85
{warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/METADATA +61 -31
warp_lang-1.2.1.dist-info/RECORD +359 -0
warp/examples/fem/example_convection_diffusion_dg0.py +0 -204
warp/native/nanovdb/PNanoVDBWrite.h +0 -295
warp_lang-1.1.0.dist-info/RECORD +0 -352
{warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/LICENSE.md +0 -0
{warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/WHEEL +0 -0
{warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/top_level.txt +0 -0

warp/native/volume_builder.h CHANGED Viewed

@@ -10,8 +10,23 @@ struct BuildGridParams {
     char name[256] = "";
 };
+template<>
+struct BuildGridParams<nanovdb::ValueIndex> {
+    double voxel_size = 1.0;
+    nanovdb::ValueIndex background_value;
+    nanovdb::Vec3d translation{0.0, 0.0, 0.0};
+    char name[256] = "";
+};
+template<>
+struct BuildGridParams<nanovdb::ValueOnIndex> {
+    double voxel_size = 1.0;
+    nanovdb::Vec3d translation{0.0, 0.0, 0.0};
+    char name[256] = "";
+};
 template <typename BuildT>
-void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
+void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
                            size_t &out_grid_size,
                            const void *points,
                            size_t num_points,

warp/native/volume_impl.h ADDED Viewed

@@ -0,0 +1,61 @@
+/** Copyright (c) 2022 NVIDIA CORPORATION.  All rights reserved.
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto.  Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
+ */
+#pragma once
+#include "volume.h"
+// Helper functions for cpp/cu files, not to be exposed to user kernels
+namespace wp
+{
+namespace volume
+{
+inline CUDA_CALLABLE pnanovdb_leaf_handle_t get_leaf(const pnanovdb_buf_t buf, const uint32_t leaf_id)
+{
+    const pnanovdb_tree_handle_t tree = get_tree(buf);
+    const uint64_t first_leaf_offset = pnanovdb_tree_get_node_offset_leaf(buf, tree);
+    const uint32_t leaf_stride = PNANOVDB_GRID_TYPE_GET(get_grid_type(buf), leaf_size);
+    return {pnanovdb_address_offset64(tree.address, first_leaf_offset + uint64_t(leaf_id) * leaf_stride)};
+}
+inline CUDA_CALLABLE pnanovdb_coord_t leaf_origin(const pnanovdb_buf_t buf, const pnanovdb_leaf_handle_t leaf)
+{
+    pnanovdb_coord_t origin = pnanovdb_leaf_get_bbox_min(buf, leaf);
+    // mask out last three bits corresponding to voxel coordinates within leaf
+    constexpr uint32_t MASK = (1u << 3u) - 1u;
+    origin.x &= ~MASK;
+    origin.y &= ~MASK;
+    origin.z &= ~MASK;
+    return origin;
+}
+inline CUDA_CALLABLE uint64_t leaf_voxel_index(const pnanovdb_buf_t buf, const uint32_t leaf_id,
+                                               const pnanovdb_coord_t &ijk)
+{
+    const uint32_t grid_type = get_grid_type(buf);
+    const pnanovdb_leaf_handle_t leaf = get_leaf(buf, leaf_id);
+    const pnanovdb_address_t value_address = pnanovdb_leaf_get_value_address(grid_type, buf, leaf, &ijk);
+    return volume::get_grid_voxel_index(grid_type, buf, value_address, ijk) - 1;
+}
+inline CUDA_CALLABLE pnanovdb_coord_t leaf_offset_to_local_coord(uint32_t offset)
+{
+    pnanovdb_coord_t coord;
+    coord.x = (offset >> 6) & 7;
+    coord.y = (offset >> 3) & 7;
+    coord.z = (offset >> 0) & 7;
+    return coord;
+}
+} // namespace volume
+} // namespace wp

warp/native/warp.cu CHANGED Viewed

@@ -18,6 +18,7 @@
 #include <iterator>
 #include <list>
 #include <map>
+#include <string>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
@@ -2536,7 +2537,7 @@ size_t cuda_compile_program(const char* cuda_src, int arch, const char* include_
     std::vector<const char*> opts;
     opts.push_back(arch_opt);
     opts.push_back(include_opt);
-    opts.push_back("--std=c++11");
+    opts.push_back("--std=c++17");
     if (debug)
     {
@@ -2864,7 +2865,12 @@ void* cuda_graphics_register_gl_buffer(void* context, uint32_t gl_buffer, unsign
     ContextGuard guard(context);
     CUgraphicsResource *resource = new CUgraphicsResource;
-    check_cu(cuGraphicsGLRegisterBuffer_f(resource, gl_buffer, flags));
+    bool success = check_cu(cuGraphicsGLRegisterBuffer_f(resource, gl_buffer, flags));
+    if (!success)
+    {
+        delete resource;
+        return NULL;
+    }
     return resource;
 }

warp/native/warp.h CHANGED Viewed

@@ -97,20 +97,28 @@ extern "C"
                              const void* a, const void* b, const void* c, void* d, float alpha, float beta,
                              bool row_major_a, bool row_major_b, bool allow_tf32x3_arith, int batch_count);
-    WP_API uint64_t volume_create_host(void* buf, uint64_t size);
-    WP_API void volume_get_buffer_info_host(uint64_t id, void** buf, uint64_t* size);
-    WP_API void volume_get_tiles_host(uint64_t id, void** buf, uint64_t* size);
+    WP_API uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner);
+    WP_API void volume_get_tiles_host(uint64_t id, void* buf);
+    WP_API void volume_get_voxels_host(uint64_t id, void* buf);
     WP_API void volume_destroy_host(uint64_t id);
-    WP_API uint64_t volume_create_device(void* context, void* buf, uint64_t size);
+    WP_API uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy, bool owner);
+    WP_API void volume_get_tiles_device(uint64_t id, void* buf);
+    WP_API void volume_get_voxels_device(uint64_t id, void* buf);
+    WP_API void volume_destroy_device(uint64_t id);
     WP_API uint64_t volume_f_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float bg_value, float tx, float ty, float tz, bool points_in_world_space);
     WP_API uint64_t volume_v_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float bg_value_x, float bg_value_y, float bg_value_z, float tx, float ty, float tz, bool points_in_world_space);
     WP_API uint64_t volume_i_from_tiles_device(void* context, void* points, int num_points, float voxel_size, int bg_value, float tx, float ty, float tz, bool points_in_world_space);
-    WP_API void volume_get_buffer_info_device(uint64_t id, void** buf, uint64_t* size);
-    WP_API void volume_get_tiles_device(uint64_t id, void** buf, uint64_t* size);
-    WP_API void volume_destroy_device(uint64_t id);
+    WP_API uint64_t volume_index_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float tx, float ty, float tz, bool points_in_world_space);
+    WP_API uint64_t volume_from_active_voxels_device(void* context, void* points, int num_points, float voxel_size, float tx, float ty, float tz, bool points_in_world_space);
+    WP_API void volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size);
     WP_API void volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz);
+    WP_API void volume_get_tile_and_voxel_count(uint64_t id, uint32_t& tile_count, uint64_t& voxel_count);
+    WP_API const char* volume_get_grid_info(uint64_t id, uint64_t *grid_size, uint32_t *grid_index, uint32_t *grid_count, float translation[3], float transform[9], char type_str[16]);
+    WP_API uint32_t volume_get_blind_data_count(uint64_t id);
+    WP_API const char* volume_get_blind_data_info(uint64_t id, uint32_t data_index, void** buf, uint64_t* value_count, uint32_t* value_size, char type_str[16]);
     WP_API uint64_t marching_cubes_create_device(void* context);
     WP_API void marching_cubes_destroy_device(uint64_t id);

warp/render/render_opengl.py CHANGED Viewed

@@ -469,6 +469,24 @@ def copy_rgb_frame(
     output_img[v, w, 2] = b / 255.0
+@wp.kernel
+def copy_rgb_frame_uint8(
+    input_img: wp.array(dtype=wp.uint8),
+    width: int,
+    height: int,
+    # outputs
+    output_img: wp.array(dtype=wp.uint8, ndim=3),
+):
+    w, v = wp.tid()
+    pixel = v * width + w
+    pixel *= 3
+    # flip vertically (OpenGL coordinates start at bottom)
+    v = height - v - 1
+    output_img[v, w, 0] = input_img[pixel + 0]
+    output_img[v, w, 1] = input_img[pixel + 1]
+    output_img[v, w, 2] = input_img[pixel + 2]
 @wp.kernel
 def copy_depth_frame(
     input_img: wp.array(dtype=wp.float32),
@@ -519,6 +537,34 @@ def copy_rgb_frame_tiles(
     output_img[tile, y, x, 2] = b / 255.0
+@wp.kernel
+def copy_rgb_frame_tiles_uint8(
+    input_img: wp.array(dtype=wp.uint8),
+    positions: wp.array(dtype=int, ndim=2),
+    screen_width: int,
+    screen_height: int,
+    tile_height: int,
+    # outputs
+    output_img: wp.array(dtype=wp.uint8, ndim=4),
+):
+    tile, x, y = wp.tid()
+    p = positions[tile]
+    qx = x + p[0]
+    qy = y + p[1]
+    pixel = qy * screen_width + qx
+    # flip vertically (OpenGL coordinates start at bottom)
+    y = tile_height - y - 1
+    if qx >= screen_width or qy >= screen_height:
+        output_img[tile, y, x, 0] = wp.uint8(0)
+        output_img[tile, y, x, 1] = wp.uint8(0)
+        output_img[tile, y, x, 2] = wp.uint8(0)
+        return  # prevent out-of-bounds access
+    pixel *= 3
+    output_img[tile, y, x, 0] = input_img[pixel + 0]
+    output_img[tile, y, x, 1] = input_img[pixel + 1]
+    output_img[tile, y, x, 2] = input_img[pixel + 2]
 @wp.kernel
 def copy_depth_frame_tiles(
     input_img: wp.array(dtype=wp.float32),
@@ -577,6 +623,34 @@ def copy_rgb_frame_tile(
     output_img[tile, y, x, 2] = b / 255.0
+@wp.kernel
+def copy_rgb_frame_tile_uint8(
+    input_img: wp.array(dtype=wp.uint8),
+    offset_x: int,
+    offset_y: int,
+    screen_width: int,
+    screen_height: int,
+    tile_height: int,
+    # outputs
+    output_img: wp.array(dtype=wp.uint8, ndim=4),
+):
+    tile, x, y = wp.tid()
+    qx = x + offset_x
+    qy = y + offset_y
+    pixel = qy * screen_width + qx
+    # flip vertically (OpenGL coordinates start at bottom)
+    y = tile_height - y - 1
+    if qx >= screen_width or qy >= screen_height:
+        output_img[tile, y, x, 0] = wp.uint8(0)
+        output_img[tile, y, x, 1] = wp.uint8(0)
+        output_img[tile, y, x, 2] = wp.uint8(0)
+        return  # prevent out-of-bounds access
+    pixel *= 3
+    output_img[tile, y, x, 0] = input_img[pixel + 0]
+    output_img[tile, y, x, 1] = input_img[pixel + 1]
+    output_img[tile, y, x, 2] = input_img[pixel + 2]
 def check_gl_error():
     from pyglet import gl
@@ -860,7 +934,7 @@ class OpenGLRenderer:
     def __init__(
         self,
-        title="Warp sim",
+        title="Warp",
         scaling=1.0,
         fps=60,
         up_axis="Y",
@@ -881,11 +955,57 @@ class OpenGLRenderer:
         render_depth=False,
         axis_scale=1.0,
         vsync=False,
-        headless=False,
+        headless=None,
         enable_backface_culling=True,
         enable_mouse_interaction=True,
         enable_keyboard_interaction=True,
     ):
+        """
+        Args:
+            title (str): The window title.
+            scaling (float): The scaling factor for the scene.
+            fps (int): The target frames per second.
+            up_axis (str): The up axis of the scene. Can be "X", "Y", or "Z".
+            screen_width (int): The width of the window.
+            screen_height (int): The height of the window.
+            near_plane (float): The near clipping plane.
+            far_plane (float): The far clipping plane.
+            camera_fov (float): The camera field of view in degrees.
+            camera_pos (tuple): The initial camera position.
+            camera_front (tuple): The initial camera front direction.
+            camera_up (tuple): The initial camera up direction.
+            background_color (tuple): The background color of the scene.
+            draw_grid (bool): Whether to draw a grid indicating the ground plane.
+            draw_sky (bool): Whether to draw a sky sphere.
+            draw_axis (bool): Whether to draw the coordinate system axes.
+            show_info (bool): Whether to overlay rendering information.
+            render_wireframe (bool): Whether to render scene shapes as wireframes.
+            render_depth (bool): Whether to show the depth buffer instead of the RGB image.
+            axis_scale (float): The scale of the coordinate system axes being rendered (only if ``draw_axis`` is True).
+            vsync (bool): Whether to enable vertical synchronization.
+            headless (bool): Whether to run in headless mode (no window is created). If None, the value is determined by the Pyglet configuration defined in ``pyglet.options["headless"]``.
+            enable_backface_culling (bool): Whether to enable backface culling.
+            enable_mouse_interaction (bool): Whether to enable mouse interaction.
+            enable_keyboard_interaction (bool): Whether to enable keyboard interaction.
+        Note:
+            :class:`OpenGLRenderer` requires Pyglet (version >= 2.0, known to work on 2.0.7) to be installed.
+            Headless rendering is supported via EGL on UNIX operating systems. To enable headless rendering, set the following pyglet options before importing ``warp.render``:
+            .. code-block:: python
+                import pyglet
+                pyglet.options["headless"] = True
+                import warp.render
+                # OpenGLRenderer is instantiated with headless=True by default
+                renderer = warp.render.OpenGLRenderer()
+        """
         try:
             import pyglet
@@ -917,10 +1037,15 @@ class OpenGLRenderer:
         self.window = pyglet.window.Window(
             width=screen_width, height=screen_height, caption=title, resizable=True, vsync=vsync, visible=not headless
         )
+        if headless is None:
+            self.headless = pyglet.options.get("headless", False)
+        else:
+            self.headless = headless
         self.app = pyglet.app
-        # making window current opengl rendering context
-        self.window.switch_to()
+        if not headless:
+            # making window current opengl rendering context
+            self.window.switch_to()
         self.screen_width, self.screen_height = self.window.get_framebuffer_size()
@@ -1011,15 +1136,16 @@ class OpenGLRenderer:
         self._frame_fbo = None
         self._frame_pbo = None
-        self.window.push_handlers(on_draw=self._draw)
-        self.window.push_handlers(on_resize=self._window_resize_callback)
-        self.window.push_handlers(on_key_press=self._key_press_callback)
+        if not headless:
+            self.window.push_handlers(on_draw=self._draw)
+            self.window.push_handlers(on_resize=self._window_resize_callback)
+            self.window.push_handlers(on_key_press=self._key_press_callback)
-        self._key_handler = pyglet.window.key.KeyStateHandler()
-        self.window.push_handlers(self._key_handler)
+            self._key_handler = pyglet.window.key.KeyStateHandler()
+            self.window.push_handlers(self._key_handler)
-        self.window.on_mouse_scroll = self._scroll_callback
-        self.window.on_mouse_drag = self._mouse_drag_callback
+            self.window.on_mouse_scroll = self._scroll_callback
+            self.window.on_mouse_drag = self._mouse_drag_callback
         gl.glClearColor(*self.background_color, 1)
         gl.glEnable(gl.GL_DEPTH_TEST)
@@ -1232,20 +1358,21 @@ class OpenGLRenderer:
             width=400,
         )
-        # set up our own event handling so we can synchronously render frames
-        # by calling update() in a loop
-        from pyglet.window import Window
+        if not headless:
+            # set up our own event handling so we can synchronously render frames
+            # by calling update() in a loop
+            from pyglet.window import Window
-        Window._enable_event_queue = False
+            Window._enable_event_queue = False
-        self.window.switch_to()
-        self.window.dispatch_pending_events()
+            self.window.switch_to()
+            self.window.dispatch_pending_events()
-        platform_event_loop = self.app.platform_event_loop
-        platform_event_loop.start()
+            platform_event_loop = self.app.platform_event_loop
+            platform_event_loop.start()
-        # start event loop
-        self.app.event_loop.dispatch_event("on_enter")
+            # start event loop
+            self.app.event_loop.dispatch_event("on_enter")
     @property
     def paused(self):
@@ -1266,8 +1393,9 @@ class OpenGLRenderer:
     def clear(self):
         from pyglet import gl
-        self.app.event_loop.dispatch_event("on_exit")
-        self.app.platform_event_loop.stop()
+        if not self.headless:
+            self.app.event_loop.dispatch_event("on_exit")
+            self.app.platform_event_loop.stop()
         if self._instance_transform_gl_buffer is not None:
             try:
@@ -1334,7 +1462,8 @@ class OpenGLRenderer:
         """
         Set up tiled rendering where the render buffer is split into multiple tiles that can visualize
         different shape instances of the scene with different view and projection matrices.
-        See `get_pixels` which allows to retrieve the pixels of for each tile.
+        See :meth:`get_pixels` which allows to retrieve the pixels of for each tile.
+        See :meth:`update_tile` which allows to update the shape instances, projection matrix, view matrix, tile size, or tile position for a given tile.
         :param instances: A list of lists of shape instance ids. Each list of shape instance ids
             will be rendered into a separate tile.
@@ -1516,7 +1645,7 @@ class OpenGLRenderer:
             )
             if gl.glCheckFramebufferStatus(gl.GL_FRAMEBUFFER) != gl.GL_FRAMEBUFFER_COMPLETE:
-                print("Framebuffer is not complete!")
+                print("Framebuffer is not complete!", flush=True)
                 gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
                 sys.exit(1)
@@ -1590,9 +1719,21 @@ class OpenGLRenderer:
     def camera_up(self, value):
         self.update_view_matrix(cam_up=value)
-    def update_view_matrix(self, cam_pos=None, cam_front=None, cam_up=None, stiffness=1.0):
+    def compute_view_matrix(self, cam_pos, cam_front, cam_up):
         from pyglet.math import Mat4, Vec3
+        model = np.array(self._model_matrix).reshape((4, 4))
+        cp = model @ np.array([*cam_pos / self._scaling, 1.0])
+        cf = model @ np.array([*cam_front / self._scaling, 1.0])
+        up = model @ np.array([*cam_up / self._scaling, 0.0])
+        cp = Vec3(*cp[:3])
+        cf = Vec3(*cf[:3])
+        up = Vec3(*up[:3])
+        return np.array(Mat4.look_at(cp, cp + cf, up), dtype=np.float32)
+    def update_view_matrix(self, cam_pos=None, cam_front=None, cam_up=None, stiffness=1.0):
+        from pyglet.math import Vec3
         if cam_pos is not None:
             self._camera_pos = self._camera_pos * (1.0 - stiffness) + Vec3(*cam_pos) * stiffness
         if cam_front is not None:
@@ -1600,22 +1741,16 @@ class OpenGLRenderer:
         if cam_up is not None:
             self._camera_up = self._camera_up * (1.0 - stiffness) + Vec3(*cam_up) * stiffness
-        model = np.array(self._model_matrix).reshape((4, 4))
-        cp = model @ np.array([*self._camera_pos / self._scaling, 1.0])
-        cf = model @ np.array([*self._camera_front / self._scaling, 1.0])
-        up = model @ np.array([*self._camera_up / self._scaling, 0.0])
-        cp = Vec3(*cp[:3])
-        cf = Vec3(*cf[:3])
-        up = Vec3(*up[:3])
-        self._view_matrix = np.array(Mat4.look_at(cp, cp + cf, up))
+        self._view_matrix = self.compute_view_matrix(self._camera_pos, self._camera_front, self._camera_up)
-    def compute_model_matrix(self, camera_axis: int, scaling: float):
+    @staticmethod
+    def compute_model_matrix(camera_axis: int, scaling: float):
         if camera_axis == 0:
-            return np.array((0, 0, scaling, 0, scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, 0, 1))
+            return np.array((0, 0, scaling, 0, scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, 0, 1), dtype=np.float32)
         elif camera_axis == 2:
-            return np.array((-scaling, 0, 0, 0, 0, 0, scaling, 0, 0, scaling, 0, 0, 0, 0, 0, 1))
+            return np.array((-scaling, 0, 0, 0, 0, 0, scaling, 0, 0, scaling, 0, 0, 0, 0, 0, 1), dtype=np.float32)
-        return np.array((scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, 1))
+        return np.array((scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, 1), dtype=np.float32)
     def update_model_matrix(self, model_matrix: Optional[Mat44] = None):
         from pyglet import gl
@@ -1684,8 +1819,8 @@ class OpenGLRenderer:
         self._last_time = self.clock_time
         self._frame_speed = update_duration * 100.0
-        # self.app.event_loop.idle()
-        self.app.platform_event_loop.step(self._frame_dt * 1e-3)
+        if not self.headless:
+            self.app.platform_event_loop.step(self._frame_dt * 1e-3)
         if not self.skip_rendering:
             self._skip_frame_counter += 1
@@ -1705,13 +1840,17 @@ class OpenGLRenderer:
                     update = 1.0 / update_duration
                     self._fps_render = (1.0 - self._fps_alpha) * self._fps_render + self._fps_alpha * update
-            self.app.event_loop._redraw_windows(self._frame_dt * 1e-3)
+            if not self.headless:
+                self.app.event_loop._redraw_windows(self._frame_dt * 1e-3)
+            else:
+                self._draw()
     def _draw(self):
         from pyglet import gl
-        # catch key hold events
-        self._process_inputs()
+        if not self.headless:
+            # catch key hold events
+            self._process_inputs()
         if self.enable_backface_culling:
             gl.glEnable(gl.GL_CULL_FACE)
@@ -1860,7 +1999,9 @@ Instances: {len(self._instances)}"""
         for i, viewport in enumerate(self._tile_viewports):
             projection_matrix_ptr = arr_pointer(self._tile_projection_matrices[i])
-            view_matrix_ptr = arr_pointer(self._tile_view_matrices[i] or self._view_matrix)
+            view_matrix_ptr = arr_pointer(
+                self._tile_view_matrices[i] if self._tile_view_matrices[i] is not None else self._view_matrix
+            )
             gl.glViewport(*viewport)
             if self.draw_grid:
@@ -2289,7 +2430,7 @@ Instances: {len(self._instances)}"""
             self.clear()
             self.app.event_loop.exit()
-    def get_pixels(self, target_image: wp.array, split_up_tiles=True, mode="rgb"):
+    def get_pixels(self, target_image: wp.array, split_up_tiles=True, mode="rgb", use_uint8=False):
         """
         Read the pixels from the frame buffer (RGB or depth are supported) into the given array.
@@ -2302,6 +2443,7 @@ Instances: {len(self._instances)}"""
             target_image (array): The array to read the pixels into. Must have float32 as dtype and be on a CUDA device.
             split_up_tiles (bool): Whether to split up the viewport into tiles, see :meth:`setup_tiled_rendering`.
             mode (str): can be either "rgb" or "depth"
+            use_uint8 (bool): Whether to use uint8 as dtype in RGB mode for the target_image array and return values in the range [0, 255]. Otherwise, float32 is assumed as dtype with values in the range [0, 1].
         Returns:
             bool: Whether the pixels were successfully read.
@@ -2328,7 +2470,7 @@ Instances: {len(self._instances)}"""
                     self._tile_width,
                     channels,
                 )
-            ), f"Shape of `target_image` array does not match {self.num_tiles} x {self.screen_height} x {self.screen_width} x {channels}"
+            ), f"Shape of `target_image` array does not match {self.num_tiles} x {self._tile_height} x {self._tile_width} x {channels}"
         else:
             assert target_image.shape == (
                 self.screen_height,
@@ -2355,9 +2497,7 @@ Instances: {len(self._instances)}"""
         gl.glBindTexture(gl.GL_TEXTURE_2D, 0)
         gl.glBindBuffer(gl.GL_PIXEL_PACK_BUFFER, 0)
-        pbo_buffer = wp.RegisteredGLBuffer(
-            int(self._frame_pbo.value), self._device, wp.RegisteredGLBuffer.WRITE_DISCARD
-        )
+        pbo_buffer = wp.RegisteredGLBuffer(int(self._frame_pbo.value), self._device, wp.RegisteredGLBuffer.READ_ONLY)
         screen_size = self.screen_height * self.screen_width
         if mode == "rgb":
             img = pbo_buffer.map(dtype=wp.uint8, shape=(screen_size * channels))
@@ -2368,7 +2508,7 @@ Instances: {len(self._instances)}"""
             positions = wp.array(self._tile_viewports, ndim=2, dtype=wp.int32, device=target_image.device)
             if mode == "rgb":
                 wp.launch(
-                    copy_rgb_frame_tiles,
+                    copy_rgb_frame_tiles_uint8 if use_uint8 else copy_rgb_frame_tiles,
                     dim=(self.num_tiles, self._tile_width, self._tile_height),
                     inputs=[img, positions, self.screen_width, self.screen_height, self._tile_height],
                     outputs=[target_image],
@@ -2393,7 +2533,7 @@ Instances: {len(self._instances)}"""
         else:
             if mode == "rgb":
                 wp.launch(
-                    copy_rgb_frame,
+                    copy_rgb_frame_uint8 if use_uint8 else copy_rgb_frame,
                     dim=(self.screen_width, self.screen_height),
                     inputs=[img, self.screen_width, self.screen_height],
                     outputs=[target_image],
@@ -3213,5 +3353,4 @@ Instances: {len(self._instances)}"""
 if __name__ == "__main__":
-    wp.init()
     renderer = OpenGLRenderer()

warp/sim/integrator_featherstone.py CHANGED Viewed

@@ -1288,6 +1288,7 @@ def dense_solve(
     n: int,
     L_start: int,
     b_start: int,
+    A: wp.array(dtype=float),
     L: wp.array(dtype=float),
     b: wp.array(dtype=float),
     # outputs
@@ -1303,13 +1304,14 @@ def adj_dense_solve(
     n: int,
     L_start: int,
     b_start: int,
+    A: wp.array(dtype=float),
     L: wp.array(dtype=float),
     b: wp.array(dtype=float),
     # outputs
     x: wp.array(dtype=float),
     tmp: wp.array(dtype=float),
 ):
-    if not tmp or not wp.adjoint[x] or not wp.adjoint[L]:
+    if not tmp or not wp.adjoint[x] or not wp.adjoint[A] or not wp.adjoint[L]:
         return
     for i in range(n):
         tmp[b_start + i] = 0.0
@@ -1324,12 +1326,17 @@ def adj_dense_solve(
         for j in range(n):
             wp.adjoint[L][L_start + dense_index(n, i, j)] += -tmp[b_start + i] * x[b_start + j]
+    for i in range(n):
+        for j in range(n):
+            wp.adjoint[A][L_start + dense_index(n, i, j)] += -tmp[b_start + i] * x[b_start + j]
 @wp.kernel
 def eval_dense_solve_batched(
     L_start: wp.array(dtype=int),
     L_dim: wp.array(dtype=int),
     b_start: wp.array(dtype=int),
+    A: wp.array(dtype=float),
     L: wp.array(dtype=float),
     b: wp.array(dtype=float),
     # outputs
@@ -1338,7 +1345,7 @@ def eval_dense_solve_batched(
 ):
     batch = wp.tid()
-    dense_solve(L_dim[batch], L_start[batch], b_start[batch], L, b, x, tmp)
+    dense_solve(L_dim[batch], L_start[batch], b_start[batch], A, L, b, x, tmp)
 @wp.kernel
@@ -1509,7 +1516,6 @@ class FeatherstoneIntegrator(Integrator):
             self.L = wp.zeros_like(self.H)
         if model.body_count:
-            # TODO use requires_grad here?
             self.body_I_m = wp.empty(
                 (model.body_count,), dtype=wp.spatial_matrix, device=model.device, requires_grad=model.requires_grad
             )
@@ -1859,6 +1865,7 @@ class FeatherstoneIntegrator(Integrator):
                             self.articulation_H_start,
                             self.articulation_H_rows,
                             self.articulation_dof_start,
+                            self.H,
                             self.L,
                             state_aug.joint_tau,
                         ],