PyPI - warp-lang - Versions diffs - 1.5.1__py3-none-manylinux2014_aarch64.whl → 1.6.1__py3-none-manylinux2014_aarch64.whl - Mend

warp-lang 1.5.1__py3-none-manylinux2014_aarch64.whl → 1.6.1__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (131) hide show

warp/__init__.py +5 -0
warp/autograd.py +414 -191
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +40 -12
warp/build_dll.py +13 -6
warp/builtins.py +1077 -481
warp/codegen.py +250 -122
warp/config.py +65 -21
warp/context.py +500 -149
warp/examples/assets/square_cloth.usd +0 -0
warp/examples/benchmarks/benchmark_gemm.py +27 -18
warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
warp/examples/core/example_marching_cubes.py +1 -1
warp/examples/core/example_mesh.py +1 -1
warp/examples/core/example_torch.py +18 -34
warp/examples/core/example_wave.py +1 -1
warp/examples/fem/example_apic_fluid.py +1 -0
warp/examples/fem/example_mixed_elasticity.py +1 -1
warp/examples/optim/example_bounce.py +1 -1
warp/examples/optim/example_cloth_throw.py +1 -1
warp/examples/optim/example_diffray.py +4 -15
warp/examples/optim/example_drone.py +1 -1
warp/examples/optim/example_softbody_properties.py +392 -0
warp/examples/optim/example_trajectory.py +1 -3
warp/examples/optim/example_walker.py +5 -0
warp/examples/sim/example_cartpole.py +0 -2
warp/examples/sim/example_cloth_self_contact.py +314 -0
warp/examples/sim/example_granular_collision_sdf.py +4 -5
warp/examples/sim/example_jacobian_ik.py +0 -2
warp/examples/sim/example_quadruped.py +5 -2
warp/examples/tile/example_tile_cholesky.py +79 -0
warp/examples/tile/example_tile_convolution.py +2 -2
warp/examples/tile/example_tile_fft.py +2 -2
warp/examples/tile/example_tile_filtering.py +3 -3
warp/examples/tile/example_tile_matmul.py +4 -4
warp/examples/tile/example_tile_mlp.py +12 -12
warp/examples/tile/example_tile_nbody.py +191 -0
warp/examples/tile/example_tile_walker.py +319 -0
warp/math.py +147 -0
warp/native/array.h +12 -0
warp/native/builtin.h +0 -1
warp/native/bvh.cpp +149 -70
warp/native/bvh.cu +287 -68
warp/native/bvh.h +195 -85
warp/native/clang/clang.cpp +6 -2
warp/native/crt.h +1 -0
warp/native/cuda_util.cpp +35 -0
warp/native/cuda_util.h +5 -0
warp/native/exports.h +40 -40
warp/native/intersect.h +17 -0
warp/native/mat.h +57 -3
warp/native/mathdx.cpp +19 -0
warp/native/mesh.cpp +25 -8
warp/native/mesh.cu +153 -101
warp/native/mesh.h +482 -403
warp/native/quat.h +40 -0
warp/native/solid_angle.h +7 -0
warp/native/sort.cpp +85 -0
warp/native/sort.cu +34 -0
warp/native/sort.h +3 -1
warp/native/spatial.h +11 -0
warp/native/tile.h +1189 -664
warp/native/tile_reduce.h +8 -6
warp/native/vec.h +41 -0
warp/native/warp.cpp +8 -1
warp/native/warp.cu +263 -40
warp/native/warp.h +19 -5
warp/optim/linear.py +22 -4
warp/render/render_opengl.py +132 -59
warp/render/render_usd.py +10 -2
warp/sim/__init__.py +6 -1
warp/sim/collide.py +289 -32
warp/sim/import_urdf.py +20 -5
warp/sim/integrator_euler.py +25 -7
warp/sim/integrator_featherstone.py +147 -35
warp/sim/integrator_vbd.py +842 -40
warp/sim/model.py +173 -112
warp/sim/render.py +2 -2
warp/stubs.py +249 -116
warp/tape.py +28 -30
warp/tests/aux_test_module_unload.py +15 -0
warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
warp/tests/test_array.py +100 -0
warp/tests/test_assert.py +242 -0
warp/tests/test_codegen.py +14 -61
warp/tests/test_collision.py +8 -8
warp/tests/test_examples.py +16 -1
warp/tests/test_grad_debug.py +87 -2
warp/tests/test_hash_grid.py +1 -1
warp/tests/test_ipc.py +116 -0
warp/tests/test_launch.py +77 -26
warp/tests/test_mat.py +213 -168
warp/tests/test_math.py +47 -1
warp/tests/test_matmul.py +11 -7
warp/tests/test_matmul_lite.py +4 -4
warp/tests/test_mesh.py +84 -60
warp/tests/test_mesh_query_aabb.py +165 -0
warp/tests/test_mesh_query_point.py +328 -286
warp/tests/test_mesh_query_ray.py +134 -121
warp/tests/test_mlp.py +2 -2
warp/tests/test_operators.py +43 -0
warp/tests/test_overwrite.py +6 -5
warp/tests/test_quat.py +77 -0
warp/tests/test_reload.py +29 -0
warp/tests/test_sim_grad_bounce_linear.py +204 -0
warp/tests/test_static.py +16 -0
warp/tests/test_tape.py +25 -0
warp/tests/test_tile.py +134 -191
warp/tests/test_tile_load.py +399 -0
warp/tests/test_tile_mathdx.py +61 -8
warp/tests/test_tile_mlp.py +17 -17
warp/tests/test_tile_reduce.py +24 -18
warp/tests/test_tile_shared_memory.py +66 -17
warp/tests/test_tile_view.py +165 -0
warp/tests/test_torch.py +35 -0
warp/tests/test_utils.py +36 -24
warp/tests/test_vec.py +110 -0
warp/tests/unittest_suites.py +29 -4
warp/tests/unittest_utils.py +30 -11
warp/thirdparty/unittest_parallel.py +5 -2
warp/types.py +419 -111
warp/utils.py +9 -5
{warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/METADATA +86 -45
{warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/RECORD +129 -118
{warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/WHEEL +1 -1
warp/examples/benchmarks/benchmark_tile.py +0 -179
warp/native/tile_gemm.h +0 -341
{warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/LICENSE.md +0 -0
{warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/top_level.txt +0 -0

warp/native/quat.h CHANGED Viewed

@@ -487,6 +487,37 @@ inline CUDA_CALLABLE void adj_indexref(quat_t<Type>* q, int idx,
     // nop
 }
+template<typename Type>
+inline CUDA_CALLABLE void augassign_add(quat_t<Type>& q, int idx, Type value)
+{
+    q[idx] += value;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_augassign_add(quat_t<Type>& q, int idx, Type value,
+                                        quat_t<Type>& adj_q, int adj_idx, Type& adj_value)
+{
+    adj_value += adj_q[idx];
+}
+template<typename Type>
+inline CUDA_CALLABLE void augassign_sub(quat_t<Type>& q, int idx, Type value)
+{
+    q[idx] -= value;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_augassign_sub(quat_t<Type>& q, int idx, Type value,
+                                        quat_t<Type>& adj_q, int adj_idx, Type& adj_value)
+{
+    adj_value -= adj_q[idx];
+}
 template<typename Type>
 inline CUDA_CALLABLE quat_t<Type> assign(quat_t<Type>& q, int idx, Type value)
 {
@@ -1229,6 +1260,15 @@ inline CUDA_CALLABLE quat_t<Type> quat_identity()
     return quat_t<Type>(Type(0), Type(0), Type(0), Type(1));
 }
+template<typename Type>
+CUDA_CALLABLE inline int len(const quat_t<Type>& x)
+{
+    return 4;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_len(const quat_t<Type>& x, quat_t<Type>& adj_x, const int& adj_ret)
+{
+}
 } // namespace wp

warp/native/solid_angle.h CHANGED Viewed

@@ -357,6 +357,13 @@ CUDA_CALLABLE inline void combine_precomputed_solid_angle_props(SolidAngleProps
 	my_data.max_p_dist_sq = length_sq(max(my_data.average_p - my_data.box.lower, my_data.box.upper - my_data.average_p));
 }
+CUDA_CALLABLE inline SolidAngleProps combine_precomputed_solid_angle_props(const SolidAngleProps* left_child_data, const SolidAngleProps* right_child_data)
+{
+	SolidAngleProps my_data;
+	combine_precomputed_solid_angle_props(my_data, left_child_data, right_child_data);
+	return my_data;
+}
 // Return whether need to
 CUDA_CALLABLE inline bool evaluate_node_solid_angle(const vec3 &query_point, SolidAngleProps *current_data, float &solid_angle, const float accuracy_scale_sq)
 {

warp/native/sort.cpp CHANGED Viewed

@@ -77,12 +77,90 @@ void radix_sort_pairs_host(int* keys, int* values, int n)
 	}
 }
+ //http://stereopsis.com/radix.html
+inline unsigned int radix_float_to_int(float f)
+{
+	unsigned int i = reinterpret_cast<unsigned int&>(f);
+	unsigned int mask = (unsigned int)(-(int)(i >> 31)) | 0x80000000;
+	return i ^ mask;
+}
+void radix_sort_pairs_host(float* keys, int* values, int n)
+{
+	static unsigned int tables[2][1 << 16];
+	memset(tables, 0, sizeof(tables));
+	float* auxKeys = keys + n;
+	int* auxValues = values + n;
+	// build histograms
+	for (int i=0; i < n; ++i)
+	{
+		const unsigned int k = radix_float_to_int(keys[i]);
+		const unsigned short low = k & 0xffff;
+		const unsigned short high = k >> 16;
+		++tables[0][low];
+		++tables[1][high];
+	}
+	// convert histograms to offset tables in-place
+	unsigned int offlow = 0;
+	unsigned int offhigh = 0;
+	for (int i=0; i < 65536; ++i)
+	{
+		const unsigned int newofflow = offlow + tables[0][i];
+		const unsigned int newoffhigh = offhigh + tables[1][i];
+		tables[0][i] = offlow;
+		tables[1][i] = offhigh;
+		offlow = newofflow;
+		offhigh = newoffhigh;
+	}
+	// pass 1 - sort by low 16 bits
+	for (int i=0; i < n; ++i)
+	{
+		// lookup offset of input
+		const float f = keys[i];
+		const unsigned int k = radix_float_to_int(f);
+		const int v = values[i];
+		const unsigned int b = k & 0xffff;
+		// find offset and increment
+		const unsigned int offset = tables[0][b]++;
+		auxKeys[offset] = f;
+		auxValues[offset] = v;
+	}
+	// pass 2 - sort by high 16 bits
+	for (int i=0; i < n; ++i)
+	{
+		// lookup offset of input
+		const float f = auxKeys[i];
+		const unsigned int k = radix_float_to_int(f);
+		const int v = auxValues[i];
+		const unsigned int b = k >> 16;
+		const unsigned int offset = tables[1][b]++;
+		keys[offset] = f;
+		values[offset] = v;
+	}
+}
 #if !WP_ENABLE_CUDA
 void radix_sort_reserve(void* context, int n, void** mem_out, size_t* size_out) {}
 void radix_sort_pairs_int_device(uint64_t keys, uint64_t values, int n) {}
+void radix_sort_pairs_float_device(uint64_t keys, uint64_t values, int n) {}
 #endif // !WP_ENABLE_CUDA
@@ -92,3 +170,10 @@ void radix_sort_pairs_int_host(uint64_t keys, uint64_t values, int n)
         reinterpret_cast<int *>(keys),
         reinterpret_cast<int *>(values), n);
 }
+void radix_sort_pairs_float_host(uint64_t keys, uint64_t values, int n)
+{
+    radix_sort_pairs_host(
+        reinterpret_cast<float *>(keys),
+        reinterpret_cast<int *>(values), n);
+}

warp/native/sort.cu CHANGED Viewed

@@ -95,3 +95,37 @@ void radix_sort_pairs_int_device(uint64_t keys, uint64_t values, int n)
         reinterpret_cast<int *>(keys),
         reinterpret_cast<int *>(values), n);
 }
+void radix_sort_pairs_device(void* context, float* keys, int* values, int n)
+{
+    ContextGuard guard(context);
+    cub::DoubleBuffer<float> d_keys(keys, keys + n);
+	cub::DoubleBuffer<int> d_values(values, values + n);
+    RadixSortTemp temp;
+    radix_sort_reserve(WP_CURRENT_CONTEXT, n, &temp.mem, &temp.size);
+    // sort
+    check_cuda(cub::DeviceRadixSort::SortPairs(
+        temp.mem,
+        temp.size,
+        d_keys,
+        d_values,
+        n, 0, 32,
+        (cudaStream_t)cuda_stream_get_current()));
+	if (d_keys.Current() != keys)
+		memcpy_d2d(WP_CURRENT_CONTEXT, keys, d_keys.Current(), sizeof(float)*n);
+	if (d_values.Current() != values)
+		memcpy_d2d(WP_CURRENT_CONTEXT, values, d_values.Current(), sizeof(int)*n);
+}
+void radix_sort_pairs_float_device(uint64_t keys, uint64_t values, int n)
+{
+    radix_sort_pairs_device(
+        WP_CURRENT_CONTEXT,
+        reinterpret_cast<float *>(keys),
+        reinterpret_cast<int *>(values), n);
+}

warp/native/sort.h CHANGED Viewed

@@ -12,4 +12,6 @@
 void radix_sort_reserve(void* context, int n, void** mem_out=NULL, size_t* size_out=NULL);
 void radix_sort_pairs_host(int* keys, int* values, int n);
-void radix_sort_pairs_device(void* context, int* keys, int* values, int n);
+void radix_sort_pairs_host(float* keys, int* values, int n);
+void radix_sort_pairs_device(void* context, int* keys, int* values, int n);
+void radix_sort_pairs_device(void* context, float* keys, int* values, int n);

warp/native/spatial.h CHANGED Viewed

@@ -400,6 +400,17 @@ CUDA_CALLABLE inline void adj_lerp(const transform_t<Type>& a, const transform_t
     adj_t += tensordot(b, adj_ret) - tensordot(a, adj_ret);
 }
+template<typename Type>
+CUDA_CALLABLE inline int len(const transform_t<Type>& t)
+{
+    return 7;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_len(const transform_t<Type>& t, transform_t<Type>& adj_t, const int& adj_ret)
+{
+}
 template<typename Type>
 using spatial_matrix_t = mat_t<6,6,Type>;