warp-lang 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- docs/conf.py +17 -5
- examples/env/env_ant.py +1 -1
- examples/env/env_cartpole.py +1 -1
- examples/env/env_humanoid.py +1 -1
- examples/env/env_usd.py +4 -1
- examples/env/environment.py +8 -9
- examples/example_dem.py +34 -33
- examples/example_diffray.py +364 -337
- examples/example_fluid.py +32 -23
- examples/example_jacobian_ik.py +97 -93
- examples/example_marching_cubes.py +6 -16
- examples/example_mesh.py +6 -16
- examples/example_mesh_intersect.py +16 -14
- examples/example_nvdb.py +14 -16
- examples/example_raycast.py +14 -13
- examples/example_raymarch.py +16 -23
- examples/example_render_opengl.py +19 -10
- examples/example_sim_cartpole.py +82 -78
- examples/example_sim_cloth.py +45 -48
- examples/example_sim_fk_grad.py +51 -44
- examples/example_sim_fk_grad_torch.py +47 -40
- examples/example_sim_grad_bounce.py +108 -133
- examples/example_sim_grad_cloth.py +99 -113
- examples/example_sim_granular.py +5 -6
- examples/{example_sim_sdf_shape.py → example_sim_granular_collision_sdf.py} +37 -26
- examples/example_sim_neo_hookean.py +51 -55
- examples/example_sim_particle_chain.py +4 -4
- examples/example_sim_quadruped.py +126 -81
- examples/example_sim_rigid_chain.py +54 -61
- examples/example_sim_rigid_contact.py +66 -70
- examples/example_sim_rigid_fem.py +3 -3
- examples/example_sim_rigid_force.py +1 -1
- examples/example_sim_rigid_gyroscopic.py +3 -4
- examples/example_sim_rigid_kinematics.py +28 -39
- examples/example_sim_trajopt.py +112 -110
- examples/example_sph.py +9 -8
- examples/example_wave.py +7 -7
- examples/fem/bsr_utils.py +30 -17
- examples/fem/example_apic_fluid.py +85 -69
- examples/fem/example_convection_diffusion.py +97 -93
- examples/fem/example_convection_diffusion_dg.py +142 -149
- examples/fem/example_convection_diffusion_dg0.py +141 -136
- examples/fem/example_deformed_geometry.py +146 -0
- examples/fem/example_diffusion.py +115 -84
- examples/fem/example_diffusion_3d.py +116 -86
- examples/fem/example_diffusion_mgpu.py +102 -79
- examples/fem/example_mixed_elasticity.py +139 -100
- examples/fem/example_navier_stokes.py +175 -162
- examples/fem/example_stokes.py +143 -111
- examples/fem/example_stokes_transfer.py +186 -157
- examples/fem/mesh_utils.py +59 -97
- examples/fem/plot_utils.py +138 -17
- tools/ci/publishing/build_nodes_info.py +54 -0
- warp/__init__.py +4 -3
- warp/__init__.pyi +1 -0
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +5 -3
- warp/build_dll.py +29 -9
- warp/builtins.py +836 -492
- warp/codegen.py +864 -553
- warp/config.py +3 -1
- warp/context.py +389 -172
- warp/fem/__init__.py +24 -6
- warp/fem/cache.py +318 -25
- warp/fem/dirichlet.py +7 -3
- warp/fem/domain.py +14 -0
- warp/fem/field/__init__.py +30 -38
- warp/fem/field/field.py +149 -0
- warp/fem/field/nodal_field.py +244 -138
- warp/fem/field/restriction.py +8 -6
- warp/fem/field/test.py +127 -59
- warp/fem/field/trial.py +117 -60
- warp/fem/geometry/__init__.py +5 -1
- warp/fem/geometry/deformed_geometry.py +271 -0
- warp/fem/geometry/element.py +24 -1
- warp/fem/geometry/geometry.py +86 -14
- warp/fem/geometry/grid_2d.py +112 -54
- warp/fem/geometry/grid_3d.py +134 -65
- warp/fem/geometry/hexmesh.py +953 -0
- warp/fem/geometry/partition.py +85 -33
- warp/fem/geometry/quadmesh_2d.py +532 -0
- warp/fem/geometry/tetmesh.py +451 -115
- warp/fem/geometry/trimesh_2d.py +197 -92
- warp/fem/integrate.py +534 -268
- warp/fem/operator.py +58 -31
- warp/fem/polynomial.py +11 -0
- warp/fem/quadrature/__init__.py +1 -1
- warp/fem/quadrature/pic_quadrature.py +150 -58
- warp/fem/quadrature/quadrature.py +209 -57
- warp/fem/space/__init__.py +230 -53
- warp/fem/space/basis_space.py +489 -0
- warp/fem/space/collocated_function_space.py +105 -0
- warp/fem/space/dof_mapper.py +49 -2
- warp/fem/space/function_space.py +90 -39
- warp/fem/space/grid_2d_function_space.py +149 -496
- warp/fem/space/grid_3d_function_space.py +173 -538
- warp/fem/space/hexmesh_function_space.py +352 -0
- warp/fem/space/partition.py +129 -76
- warp/fem/space/quadmesh_2d_function_space.py +369 -0
- warp/fem/space/restriction.py +46 -34
- warp/fem/space/shape/__init__.py +15 -0
- warp/fem/space/shape/cube_shape_function.py +738 -0
- warp/fem/space/shape/shape_function.py +103 -0
- warp/fem/space/shape/square_shape_function.py +611 -0
- warp/fem/space/shape/tet_shape_function.py +567 -0
- warp/fem/space/shape/triangle_shape_function.py +429 -0
- warp/fem/space/tetmesh_function_space.py +132 -1039
- warp/fem/space/topology.py +295 -0
- warp/fem/space/trimesh_2d_function_space.py +104 -742
- warp/fem/types.py +13 -11
- warp/fem/utils.py +335 -60
- warp/native/array.h +120 -34
- warp/native/builtin.h +101 -72
- warp/native/bvh.cpp +73 -325
- warp/native/bvh.cu +406 -23
- warp/native/bvh.h +22 -40
- warp/native/clang/clang.cpp +1 -0
- warp/native/crt.h +2 -0
- warp/native/cuda_util.cpp +8 -3
- warp/native/cuda_util.h +1 -0
- warp/native/exports.h +1522 -1243
- warp/native/intersect.h +19 -4
- warp/native/intersect_adj.h +8 -8
- warp/native/mat.h +76 -17
- warp/native/mesh.cpp +33 -108
- warp/native/mesh.cu +114 -18
- warp/native/mesh.h +395 -40
- warp/native/noise.h +272 -329
- warp/native/quat.h +51 -8
- warp/native/rand.h +44 -34
- warp/native/reduce.cpp +1 -1
- warp/native/sparse.cpp +4 -4
- warp/native/sparse.cu +163 -155
- warp/native/spatial.h +2 -2
- warp/native/temp_buffer.h +18 -14
- warp/native/vec.h +103 -21
- warp/native/warp.cpp +2 -1
- warp/native/warp.cu +28 -3
- warp/native/warp.h +4 -3
- warp/render/render_opengl.py +261 -109
- warp/sim/__init__.py +1 -2
- warp/sim/articulation.py +385 -185
- warp/sim/import_mjcf.py +59 -48
- warp/sim/import_urdf.py +15 -15
- warp/sim/import_usd.py +174 -102
- warp/sim/inertia.py +17 -18
- warp/sim/integrator_xpbd.py +4 -3
- warp/sim/model.py +330 -250
- warp/sim/render.py +1 -1
- warp/sparse.py +625 -152
- warp/stubs.py +341 -309
- warp/tape.py +9 -6
- warp/tests/__main__.py +3 -6
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
- warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
- warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
- warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
- warp/tests/aux_test_unresolved_func.py +14 -0
- warp/tests/aux_test_unresolved_symbol.py +14 -0
- warp/tests/disabled_kinematics.py +239 -0
- warp/tests/run_coverage_serial.py +31 -0
- warp/tests/test_adam.py +103 -106
- warp/tests/test_arithmetic.py +94 -74
- warp/tests/test_array.py +82 -101
- warp/tests/test_array_reduce.py +57 -23
- warp/tests/test_atomic.py +64 -28
- warp/tests/test_bool.py +22 -12
- warp/tests/test_builtins_resolution.py +1292 -0
- warp/tests/test_bvh.py +18 -18
- warp/tests/test_closest_point_edge_edge.py +54 -57
- warp/tests/test_codegen.py +165 -134
- warp/tests/test_compile_consts.py +28 -20
- warp/tests/test_conditional.py +108 -24
- warp/tests/test_copy.py +10 -12
- warp/tests/test_ctypes.py +112 -88
- warp/tests/test_dense.py +21 -14
- warp/tests/test_devices.py +98 -0
- warp/tests/test_dlpack.py +75 -75
- warp/tests/test_examples.py +237 -0
- warp/tests/test_fabricarray.py +22 -24
- warp/tests/test_fast_math.py +15 -11
- warp/tests/test_fem.py +1034 -124
- warp/tests/test_fp16.py +23 -16
- warp/tests/test_func.py +187 -86
- warp/tests/test_generics.py +194 -49
- warp/tests/test_grad.py +123 -181
- warp/tests/test_grad_customs.py +176 -0
- warp/tests/test_hash_grid.py +35 -34
- warp/tests/test_import.py +10 -23
- warp/tests/test_indexedarray.py +24 -25
- warp/tests/test_intersect.py +18 -9
- warp/tests/test_large.py +141 -0
- warp/tests/test_launch.py +14 -41
- warp/tests/test_lerp.py +64 -65
- warp/tests/test_lvalue.py +493 -0
- warp/tests/test_marching_cubes.py +12 -13
- warp/tests/test_mat.py +517 -2898
- warp/tests/test_mat_lite.py +115 -0
- warp/tests/test_mat_scalar_ops.py +2889 -0
- warp/tests/test_math.py +103 -9
- warp/tests/test_matmul.py +304 -69
- warp/tests/test_matmul_lite.py +410 -0
- warp/tests/test_mesh.py +60 -22
- warp/tests/test_mesh_query_aabb.py +21 -25
- warp/tests/test_mesh_query_point.py +111 -22
- warp/tests/test_mesh_query_ray.py +12 -24
- warp/tests/test_mlp.py +30 -22
- warp/tests/test_model.py +92 -89
- warp/tests/test_modules_lite.py +39 -0
- warp/tests/test_multigpu.py +88 -114
- warp/tests/test_noise.py +12 -11
- warp/tests/test_operators.py +16 -20
- warp/tests/test_options.py +11 -11
- warp/tests/test_pinned.py +17 -18
- warp/tests/test_print.py +32 -11
- warp/tests/test_quat.py +275 -129
- warp/tests/test_rand.py +18 -16
- warp/tests/test_reload.py +38 -34
- warp/tests/test_rounding.py +50 -43
- warp/tests/test_runlength_encode.py +168 -20
- warp/tests/test_smoothstep.py +9 -11
- warp/tests/test_snippet.py +143 -0
- warp/tests/test_sparse.py +261 -63
- warp/tests/test_spatial.py +276 -243
- warp/tests/test_streams.py +110 -85
- warp/tests/test_struct.py +268 -63
- warp/tests/test_tape.py +39 -21
- warp/tests/test_torch.py +90 -86
- warp/tests/test_transient_module.py +10 -12
- warp/tests/test_types.py +363 -0
- warp/tests/test_utils.py +451 -0
- warp/tests/test_vec.py +354 -2050
- warp/tests/test_vec_lite.py +73 -0
- warp/tests/test_vec_scalar_ops.py +2099 -0
- warp/tests/test_volume.py +418 -376
- warp/tests/test_volume_write.py +124 -134
- warp/tests/unittest_serial.py +35 -0
- warp/tests/unittest_suites.py +291 -0
- warp/tests/unittest_utils.py +342 -0
- warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
- warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
- warp/thirdparty/appdirs.py +36 -45
- warp/thirdparty/unittest_parallel.py +589 -0
- warp/types.py +622 -211
- warp/utils.py +54 -393
- warp_lang-1.0.0b6.dist-info/METADATA +238 -0
- warp_lang-1.0.0b6.dist-info/RECORD +409 -0
- {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
- examples/example_cache_management.py +0 -40
- examples/example_multigpu.py +0 -54
- examples/example_struct.py +0 -65
- examples/fem/example_stokes_transfer_3d.py +0 -210
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/fem/field/discrete_field.py +0 -80
- warp/fem/space/nodal_function_space.py +0 -233
- warp/tests/test_all.py +0 -223
- warp/tests/test_array_scan.py +0 -60
- warp/tests/test_base.py +0 -208
- warp/tests/test_unresolved_func.py +0 -7
- warp/tests/test_unresolved_symbol.py +0 -7
- warp_lang-1.0.0b2.dist-info/METADATA +0 -26
- warp_lang-1.0.0b2.dist-info/RECORD +0 -380
- /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
- /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
- /warp/tests/{test_square.py → aux_test_square.py} +0 -0
- {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0
warp/native/vec.h
CHANGED
|
@@ -33,7 +33,7 @@ struct vec_t
|
|
|
33
33
|
{
|
|
34
34
|
for( unsigned i=0; i < Length; ++i )
|
|
35
35
|
{
|
|
36
|
-
c[i] = other[i];
|
|
36
|
+
c[i] = static_cast<Type>(other[i]);
|
|
37
37
|
}
|
|
38
38
|
}
|
|
39
39
|
|
|
@@ -284,12 +284,41 @@ inline CUDA_CALLABLE vec_t<2, Type> div(vec_t<2, Type> a, Type s)
|
|
|
284
284
|
return vec_t<2, Type>(a.c[0]/s,a.c[1]/s);
|
|
285
285
|
}
|
|
286
286
|
|
|
287
|
+
template<unsigned Length, typename Type>
|
|
288
|
+
inline CUDA_CALLABLE vec_t<Length, Type> div(Type s, vec_t<Length, Type> a)
|
|
289
|
+
{
|
|
290
|
+
vec_t<Length, Type> ret;
|
|
291
|
+
for (unsigned i=0; i < Length; ++i)
|
|
292
|
+
{
|
|
293
|
+
ret[i] = s / a[i];
|
|
294
|
+
}
|
|
295
|
+
return ret;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
template<typename Type>
|
|
299
|
+
inline CUDA_CALLABLE vec_t<3, Type> div(Type s, vec_t<3, Type> a)
|
|
300
|
+
{
|
|
301
|
+
return vec_t<3, Type>(s/a.c[0],s/a.c[1],s/a.c[2]);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
template<typename Type>
|
|
305
|
+
inline CUDA_CALLABLE vec_t<2, Type> div(Type s, vec_t<2, Type> a)
|
|
306
|
+
{
|
|
307
|
+
return vec_t<2, Type>(s/a.c[0],s/a.c[1]);
|
|
308
|
+
}
|
|
309
|
+
|
|
287
310
|
template<unsigned Length, typename Type>
|
|
288
311
|
inline CUDA_CALLABLE vec_t<Length, Type> operator / (vec_t<Length, Type> a, Type s)
|
|
289
312
|
{
|
|
290
313
|
return div(a,s);
|
|
291
314
|
}
|
|
292
315
|
|
|
316
|
+
template<unsigned Length, typename Type>
|
|
317
|
+
inline CUDA_CALLABLE vec_t<Length, Type> operator / (Type s, vec_t<Length, Type> a)
|
|
318
|
+
{
|
|
319
|
+
return div(s, a);
|
|
320
|
+
}
|
|
321
|
+
|
|
293
322
|
// component wise division
|
|
294
323
|
template<unsigned Length, typename Type>
|
|
295
324
|
inline CUDA_CALLABLE vec_t<Length, Type> cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
@@ -383,7 +412,7 @@ inline CUDA_CALLABLE Type tensordot(vec_t<Length, Type> a, vec_t<Length, Type> b
|
|
|
383
412
|
|
|
384
413
|
|
|
385
414
|
template<unsigned Length, typename Type>
|
|
386
|
-
inline CUDA_CALLABLE Type
|
|
415
|
+
inline CUDA_CALLABLE Type extract(const vec_t<Length, Type> & a, int idx)
|
|
387
416
|
{
|
|
388
417
|
#ifndef NDEBUG
|
|
389
418
|
if (idx < 0 || idx >= Length)
|
|
@@ -397,7 +426,21 @@ inline CUDA_CALLABLE Type index(const vec_t<Length, Type> & a, int idx)
|
|
|
397
426
|
}
|
|
398
427
|
|
|
399
428
|
template<unsigned Length, typename Type>
|
|
400
|
-
inline CUDA_CALLABLE
|
|
429
|
+
inline CUDA_CALLABLE Type* index(vec_t<Length, Type>& v, int idx)
|
|
430
|
+
{
|
|
431
|
+
#ifndef NDEBUG
|
|
432
|
+
if (idx < 0 || idx >= Length)
|
|
433
|
+
{
|
|
434
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
435
|
+
assert(0);
|
|
436
|
+
}
|
|
437
|
+
#endif
|
|
438
|
+
|
|
439
|
+
return &v[idx];
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
template<unsigned Length, typename Type>
|
|
443
|
+
inline CUDA_CALLABLE Type* indexref(vec_t<Length, Type>* v, int idx)
|
|
401
444
|
{
|
|
402
445
|
#ifndef NDEBUG
|
|
403
446
|
if (idx < 0 || idx >= Length)
|
|
@@ -407,17 +450,23 @@ inline CUDA_CALLABLE void indexset(vec_t<Length, Type>& v, int idx, Type value)
|
|
|
407
450
|
}
|
|
408
451
|
#endif
|
|
409
452
|
|
|
410
|
-
v[idx]
|
|
453
|
+
return &((*v)[idx]);
|
|
411
454
|
}
|
|
412
455
|
|
|
413
456
|
template<unsigned Length, typename Type>
|
|
414
|
-
inline CUDA_CALLABLE void
|
|
457
|
+
inline CUDA_CALLABLE void adj_index(vec_t<Length, Type>& v, int idx,
|
|
415
458
|
vec_t<Length, Type>& adj_v, int adj_idx, const Type& adj_value)
|
|
416
459
|
{
|
|
417
460
|
// nop
|
|
418
461
|
}
|
|
419
462
|
|
|
420
463
|
|
|
464
|
+
template<unsigned Length, typename Type>
|
|
465
|
+
inline CUDA_CALLABLE void adj_indexref(vec_t<Length, Type>* v, int idx,
|
|
466
|
+
vec_t<Length, Type>& adj_v, int adj_idx, const Type& adj_value)
|
|
467
|
+
{
|
|
468
|
+
// nop
|
|
469
|
+
}
|
|
421
470
|
|
|
422
471
|
|
|
423
472
|
template<unsigned Length, typename Type>
|
|
@@ -645,7 +694,7 @@ inline CUDA_CALLABLE void adj_vec_t(const vec_t<Length, OtherType>& other, vec_t
|
|
|
645
694
|
{
|
|
646
695
|
for( unsigned i=0; i < Length; ++i )
|
|
647
696
|
{
|
|
648
|
-
adj_other[i] += adj_ret[i];
|
|
697
|
+
adj_other[i] += static_cast<OtherType>(adj_ret[i]);
|
|
649
698
|
}
|
|
650
699
|
}
|
|
651
700
|
|
|
@@ -715,9 +764,30 @@ inline CUDA_CALLABLE void adj_div(vec_t<Length, Type> a, Type s, vec_t<Length, T
|
|
|
715
764
|
}
|
|
716
765
|
|
|
717
766
|
template<unsigned Length, typename Type>
|
|
718
|
-
inline CUDA_CALLABLE void
|
|
767
|
+
inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
768
|
+
{
|
|
769
|
+
|
|
770
|
+
adj_s -= dot(a , adj_ret)/ (s * s); // - a / s^2
|
|
771
|
+
|
|
772
|
+
for( unsigned i=0; i < Length; ++i )
|
|
773
|
+
{
|
|
774
|
+
adj_a[i] += s / adj_ret[i];
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
#if FP_CHECK
|
|
778
|
+
if (!isfinite(a) || !isfinite(s) || !isfinite(adj_a) || !isfinite(adj_s) || !isfinite(adj_ret))
|
|
779
|
+
{
|
|
780
|
+
// \TODO: How shall we implement this error message?
|
|
781
|
+
// printf("adj_div((%f %f %f %f), %f, (%f %f %f %f), %f, (%f %f %f %f)\n", a.x, a.y, a.z, a.w, s, adj_a.x, adj_a.y, adj_a.z, adj_a.w, adj_s, adj_ret.x, adj_ret.y, adj_ret.z, adj_ret.w);
|
|
782
|
+
assert(0);
|
|
783
|
+
}
|
|
784
|
+
#endif
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
template<unsigned Length, typename Type>
|
|
788
|
+
inline CUDA_CALLABLE void adj_cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret) {
|
|
719
789
|
adj_a += cw_div(adj_ret, b);
|
|
720
|
-
adj_b -= cw_mul(adj_ret, cw_div(
|
|
790
|
+
adj_b -= cw_mul(adj_ret, cw_div(ret, b));
|
|
721
791
|
}
|
|
722
792
|
|
|
723
793
|
template<unsigned Length, typename Type>
|
|
@@ -816,7 +886,7 @@ inline CUDA_CALLABLE void adj_dot(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, T
|
|
|
816
886
|
|
|
817
887
|
|
|
818
888
|
template<unsigned Length, typename Type>
|
|
819
|
-
inline CUDA_CALLABLE void
|
|
889
|
+
inline CUDA_CALLABLE void adj_extract(const vec_t<Length, Type> & a, int idx, vec_t<Length, Type> & adj_a, int & adj_idx, Type & adj_ret)
|
|
820
890
|
{
|
|
821
891
|
#ifndef NDEBUG
|
|
822
892
|
if (idx < 0 || idx > Length)
|
|
@@ -830,9 +900,12 @@ inline CUDA_CALLABLE void adj_index(const vec_t<Length, Type> & a, int idx, vec_
|
|
|
830
900
|
}
|
|
831
901
|
|
|
832
902
|
template<unsigned Length, typename Type>
|
|
833
|
-
inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const Type adj_ret)
|
|
903
|
+
inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, Type ret, vec_t<Length, Type>& adj_a, const Type adj_ret)
|
|
834
904
|
{
|
|
835
|
-
|
|
905
|
+
if (ret > Type(kEps))
|
|
906
|
+
{
|
|
907
|
+
adj_a += div(a, ret) * adj_ret;
|
|
908
|
+
}
|
|
836
909
|
|
|
837
910
|
#if FP_CHECK
|
|
838
911
|
if (!isfinite(adj_a))
|
|
@@ -860,7 +933,7 @@ inline CUDA_CALLABLE void adj_length_sq(vec_t<Length, Type> a, vec_t<Length, Typ
|
|
|
860
933
|
}
|
|
861
934
|
|
|
862
935
|
template<unsigned Length, typename Type>
|
|
863
|
-
inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
936
|
+
inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
864
937
|
{
|
|
865
938
|
Type d = length(a);
|
|
866
939
|
|
|
@@ -868,9 +941,7 @@ inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Typ
|
|
|
868
941
|
{
|
|
869
942
|
Type invd = Type(1.0f)/d;
|
|
870
943
|
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
adj_a += (adj_ret*invd - ahat*(dot(ahat, adj_ret))*invd);
|
|
944
|
+
adj_a += (adj_ret*invd - ret*(dot(ret, adj_ret))*invd);
|
|
874
945
|
|
|
875
946
|
#if FP_CHECK
|
|
876
947
|
if (!isfinite(adj_a))
|
|
@@ -931,8 +1002,8 @@ inline CUDA_CALLABLE void adj_max(const vec_t<Length,Type> &v, vec_t<Length,Type
|
|
|
931
1002
|
|
|
932
1003
|
// Do I need to specialize these for different lengths?
|
|
933
1004
|
template<unsigned Length, typename Type>
|
|
934
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
935
|
-
|
|
1005
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1006
|
+
{
|
|
936
1007
|
vec_t<Length, Type> ret;
|
|
937
1008
|
for( unsigned i=0; i < Length; ++i )
|
|
938
1009
|
{
|
|
@@ -943,8 +1014,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr,
|
|
|
943
1014
|
}
|
|
944
1015
|
|
|
945
1016
|
template<unsigned Length, typename Type>
|
|
946
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
947
|
-
|
|
1017
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1018
|
+
{
|
|
948
1019
|
vec_t<Length, Type> ret;
|
|
949
1020
|
for( unsigned i=0; i < Length; ++i )
|
|
950
1021
|
{
|
|
@@ -955,8 +1026,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr,
|
|
|
955
1026
|
}
|
|
956
1027
|
|
|
957
1028
|
template<unsigned Length, typename Type>
|
|
958
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
959
|
-
|
|
1029
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1030
|
+
{
|
|
960
1031
|
vec_t<Length, Type> ret;
|
|
961
1032
|
for( unsigned i=0; i < Length; ++i )
|
|
962
1033
|
{
|
|
@@ -966,6 +1037,17 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
|
|
|
966
1037
|
return ret;
|
|
967
1038
|
}
|
|
968
1039
|
|
|
1040
|
+
template<unsigned Length, typename Type>
|
|
1041
|
+
inline CUDA_CALLABLE void adj_atomic_minmax(
|
|
1042
|
+
vec_t<Length,Type> *addr,
|
|
1043
|
+
vec_t<Length,Type> *adj_addr,
|
|
1044
|
+
const vec_t<Length,Type> &value,
|
|
1045
|
+
vec_t<Length,Type> &adj_value)
|
|
1046
|
+
{
|
|
1047
|
+
for (unsigned i=0; i < Length; ++i)
|
|
1048
|
+
adj_atomic_minmax(&(addr->c[i]), &(adj_addr->c[i]), value[i], adj_value[i]);
|
|
1049
|
+
}
|
|
1050
|
+
|
|
969
1051
|
// ok, the original implementation of this didn't take the absolute values.
|
|
970
1052
|
// I wouldn't consider this expected behavior. It looks like it's only
|
|
971
1053
|
// being used for bounding boxes at the moment, where this doesn't matter,
|
warp/native/warp.cpp
CHANGED
|
@@ -945,6 +945,7 @@ void array_fill_device(void* context, void* arr, int arr_type, const void* value
|
|
|
945
945
|
|
|
946
946
|
WP_API int cuda_driver_version() { return 0; }
|
|
947
947
|
WP_API int cuda_toolkit_version() { return 0; }
|
|
948
|
+
WP_API bool cuda_driver_is_initialized() { return false; }
|
|
948
949
|
|
|
949
950
|
WP_API int nvrtc_supported_arch_count() { return 0; }
|
|
950
951
|
WP_API void nvrtc_supported_archs(int* archs) {}
|
|
@@ -994,7 +995,7 @@ WP_API size_t cuda_compile_program(const char* cuda_src, int arch, const char* i
|
|
|
994
995
|
WP_API void* cuda_load_module(void* context, const char* ptx) { return NULL; }
|
|
995
996
|
WP_API void cuda_unload_module(void* context, void* module) {}
|
|
996
997
|
WP_API void* cuda_get_kernel(void* context, void* module, const char* name) { return NULL; }
|
|
997
|
-
WP_API size_t cuda_launch_kernel(void* context, void* kernel, size_t dim, void** args) { return 0;}
|
|
998
|
+
WP_API size_t cuda_launch_kernel(void* context, void* kernel, size_t dim, int max_blocks, void** args) { return 0;}
|
|
998
999
|
|
|
999
1000
|
WP_API void cuda_set_context_restore_policy(bool always_restore) {}
|
|
1000
1001
|
WP_API int cuda_get_context_restore_policy() { return false; }
|
warp/native/warp.cu
CHANGED
|
@@ -302,7 +302,7 @@ void memset_device(void* context, void* dest, int value, size_t n)
|
|
|
302
302
|
{
|
|
303
303
|
ContextGuard guard(context);
|
|
304
304
|
|
|
305
|
-
if ((n%4) > 0)
|
|
305
|
+
if (true)// ((n%4) > 0)
|
|
306
306
|
{
|
|
307
307
|
// for unaligned lengths fallback to CUDA memset
|
|
308
308
|
check_cuda(cudaMemsetAsync(dest, value, n, get_current_stream()));
|
|
@@ -1141,6 +1141,11 @@ int cuda_toolkit_version()
|
|
|
1141
1141
|
return CUDA_VERSION;
|
|
1142
1142
|
}
|
|
1143
1143
|
|
|
1144
|
+
bool cuda_driver_is_initialized()
|
|
1145
|
+
{
|
|
1146
|
+
return is_cuda_driver_initialized();
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1144
1149
|
int nvrtc_supported_arch_count()
|
|
1145
1150
|
{
|
|
1146
1151
|
int count;
|
|
@@ -1841,14 +1846,34 @@ void* cuda_get_kernel(void* context, void* module, const char* name)
|
|
|
1841
1846
|
return kernel;
|
|
1842
1847
|
}
|
|
1843
1848
|
|
|
1844
|
-
size_t cuda_launch_kernel(void* context, void* kernel, size_t dim, void** args)
|
|
1849
|
+
size_t cuda_launch_kernel(void* context, void* kernel, size_t dim, int max_blocks, void** args)
|
|
1845
1850
|
{
|
|
1846
1851
|
ContextGuard guard(context);
|
|
1847
1852
|
|
|
1848
1853
|
const int block_dim = 256;
|
|
1849
1854
|
// CUDA specs up to compute capability 9.0 says the max x-dim grid is 2**31-1, so
|
|
1850
1855
|
// grid_dim is fine as an int for the near future
|
|
1851
|
-
|
|
1856
|
+
int grid_dim = (dim + block_dim - 1)/block_dim;
|
|
1857
|
+
|
|
1858
|
+
if (max_blocks <= 0) {
|
|
1859
|
+
max_blocks = 2147483647;
|
|
1860
|
+
}
|
|
1861
|
+
|
|
1862
|
+
if (grid_dim < 0)
|
|
1863
|
+
{
|
|
1864
|
+
#if defined(_DEBUG)
|
|
1865
|
+
fprintf(stderr, "Warp warning: Overflow in grid dimensions detected for %zu total elements and 256 threads "
|
|
1866
|
+
"per block.\n Setting block count to %d.\n", dim, max_blocks);
|
|
1867
|
+
#endif
|
|
1868
|
+
grid_dim = max_blocks;
|
|
1869
|
+
}
|
|
1870
|
+
else
|
|
1871
|
+
{
|
|
1872
|
+
if (grid_dim > max_blocks)
|
|
1873
|
+
{
|
|
1874
|
+
grid_dim = max_blocks;
|
|
1875
|
+
}
|
|
1876
|
+
}
|
|
1852
1877
|
|
|
1853
1878
|
CUresult res = cuLaunchKernel_f(
|
|
1854
1879
|
(CUfunction)kernel,
|
warp/native/warp.h
CHANGED
|
@@ -54,11 +54,11 @@ extern "C"
|
|
|
54
54
|
WP_API void memtile_host(void* dest, const void* src, size_t srcsize, size_t n);
|
|
55
55
|
WP_API void memtile_device(void* context, void* dest, const void* src, size_t srcsize, size_t n);
|
|
56
56
|
|
|
57
|
-
WP_API uint64_t bvh_create_host(wp::vec3* lowers, wp::vec3* uppers, int
|
|
57
|
+
WP_API uint64_t bvh_create_host(wp::vec3* lowers, wp::vec3* uppers, int num_items);
|
|
58
58
|
WP_API void bvh_destroy_host(uint64_t id);
|
|
59
59
|
WP_API void bvh_refit_host(uint64_t id);
|
|
60
60
|
|
|
61
|
-
WP_API uint64_t bvh_create_device(void* context, wp::vec3* lowers, wp::vec3* uppers, int
|
|
61
|
+
WP_API uint64_t bvh_create_device(void* context, wp::vec3* lowers, wp::vec3* uppers, int num_items);
|
|
62
62
|
WP_API void bvh_destroy_device(uint64_t id);
|
|
63
63
|
WP_API void bvh_refit_device(uint64_t id);
|
|
64
64
|
|
|
@@ -214,6 +214,7 @@ extern "C"
|
|
|
214
214
|
|
|
215
215
|
WP_API int cuda_driver_version(); // CUDA driver version
|
|
216
216
|
WP_API int cuda_toolkit_version(); // CUDA Toolkit version used to build Warp
|
|
217
|
+
WP_API bool cuda_driver_is_initialized();
|
|
217
218
|
|
|
218
219
|
WP_API int nvrtc_supported_arch_count();
|
|
219
220
|
WP_API void nvrtc_supported_archs(int* archs);
|
|
@@ -267,7 +268,7 @@ extern "C"
|
|
|
267
268
|
WP_API void* cuda_load_module(void* context, const char* ptx);
|
|
268
269
|
WP_API void cuda_unload_module(void* context, void* module);
|
|
269
270
|
WP_API void* cuda_get_kernel(void* context, void* module, const char* name);
|
|
270
|
-
WP_API size_t cuda_launch_kernel(void* context, void* kernel, size_t dim, void** args);
|
|
271
|
+
WP_API size_t cuda_launch_kernel(void* context, void* kernel, size_t dim, int max_blocks, void** args);
|
|
271
272
|
|
|
272
273
|
WP_API void cuda_set_context_restore_policy(bool always_restore);
|
|
273
274
|
WP_API int cuda_get_context_restore_policy();
|