warp-lang 1.0.0b2__py3-none-manylinux2014_x86_64.whl → 1.0.0b6__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. docs/conf.py +17 -5
  2. examples/env/env_ant.py +1 -1
  3. examples/env/env_cartpole.py +1 -1
  4. examples/env/env_humanoid.py +1 -1
  5. examples/env/env_usd.py +4 -1
  6. examples/env/environment.py +8 -9
  7. examples/example_dem.py +34 -33
  8. examples/example_diffray.py +364 -337
  9. examples/example_fluid.py +32 -23
  10. examples/example_jacobian_ik.py +97 -93
  11. examples/example_marching_cubes.py +6 -16
  12. examples/example_mesh.py +6 -16
  13. examples/example_mesh_intersect.py +16 -14
  14. examples/example_nvdb.py +14 -16
  15. examples/example_raycast.py +14 -13
  16. examples/example_raymarch.py +16 -23
  17. examples/example_render_opengl.py +19 -10
  18. examples/example_sim_cartpole.py +82 -78
  19. examples/example_sim_cloth.py +45 -48
  20. examples/example_sim_fk_grad.py +51 -44
  21. examples/example_sim_fk_grad_torch.py +47 -40
  22. examples/example_sim_grad_bounce.py +108 -133
  23. examples/example_sim_grad_cloth.py +99 -113
  24. examples/example_sim_granular.py +5 -6
  25. examples/{example_sim_sdf_shape.py → example_sim_granular_collision_sdf.py} +37 -26
  26. examples/example_sim_neo_hookean.py +51 -55
  27. examples/example_sim_particle_chain.py +4 -4
  28. examples/example_sim_quadruped.py +126 -81
  29. examples/example_sim_rigid_chain.py +54 -61
  30. examples/example_sim_rigid_contact.py +66 -70
  31. examples/example_sim_rigid_fem.py +3 -3
  32. examples/example_sim_rigid_force.py +1 -1
  33. examples/example_sim_rigid_gyroscopic.py +3 -4
  34. examples/example_sim_rigid_kinematics.py +28 -39
  35. examples/example_sim_trajopt.py +112 -110
  36. examples/example_sph.py +9 -8
  37. examples/example_wave.py +7 -7
  38. examples/fem/bsr_utils.py +30 -17
  39. examples/fem/example_apic_fluid.py +85 -69
  40. examples/fem/example_convection_diffusion.py +97 -93
  41. examples/fem/example_convection_diffusion_dg.py +142 -149
  42. examples/fem/example_convection_diffusion_dg0.py +141 -136
  43. examples/fem/example_deformed_geometry.py +146 -0
  44. examples/fem/example_diffusion.py +115 -84
  45. examples/fem/example_diffusion_3d.py +116 -86
  46. examples/fem/example_diffusion_mgpu.py +102 -79
  47. examples/fem/example_mixed_elasticity.py +139 -100
  48. examples/fem/example_navier_stokes.py +175 -162
  49. examples/fem/example_stokes.py +143 -111
  50. examples/fem/example_stokes_transfer.py +186 -157
  51. examples/fem/mesh_utils.py +59 -97
  52. examples/fem/plot_utils.py +138 -17
  53. tools/ci/publishing/build_nodes_info.py +54 -0
  54. warp/__init__.py +4 -3
  55. warp/__init__.pyi +1 -0
  56. warp/bin/warp-clang.so +0 -0
  57. warp/bin/warp.so +0 -0
  58. warp/build.py +5 -3
  59. warp/build_dll.py +29 -9
  60. warp/builtins.py +836 -492
  61. warp/codegen.py +864 -553
  62. warp/config.py +3 -1
  63. warp/context.py +389 -172
  64. warp/fem/__init__.py +24 -6
  65. warp/fem/cache.py +318 -25
  66. warp/fem/dirichlet.py +7 -3
  67. warp/fem/domain.py +14 -0
  68. warp/fem/field/__init__.py +30 -38
  69. warp/fem/field/field.py +149 -0
  70. warp/fem/field/nodal_field.py +244 -138
  71. warp/fem/field/restriction.py +8 -6
  72. warp/fem/field/test.py +127 -59
  73. warp/fem/field/trial.py +117 -60
  74. warp/fem/geometry/__init__.py +5 -1
  75. warp/fem/geometry/deformed_geometry.py +271 -0
  76. warp/fem/geometry/element.py +24 -1
  77. warp/fem/geometry/geometry.py +86 -14
  78. warp/fem/geometry/grid_2d.py +112 -54
  79. warp/fem/geometry/grid_3d.py +134 -65
  80. warp/fem/geometry/hexmesh.py +953 -0
  81. warp/fem/geometry/partition.py +85 -33
  82. warp/fem/geometry/quadmesh_2d.py +532 -0
  83. warp/fem/geometry/tetmesh.py +451 -115
  84. warp/fem/geometry/trimesh_2d.py +197 -92
  85. warp/fem/integrate.py +534 -268
  86. warp/fem/operator.py +58 -31
  87. warp/fem/polynomial.py +11 -0
  88. warp/fem/quadrature/__init__.py +1 -1
  89. warp/fem/quadrature/pic_quadrature.py +150 -58
  90. warp/fem/quadrature/quadrature.py +209 -57
  91. warp/fem/space/__init__.py +230 -53
  92. warp/fem/space/basis_space.py +489 -0
  93. warp/fem/space/collocated_function_space.py +105 -0
  94. warp/fem/space/dof_mapper.py +49 -2
  95. warp/fem/space/function_space.py +90 -39
  96. warp/fem/space/grid_2d_function_space.py +149 -496
  97. warp/fem/space/grid_3d_function_space.py +173 -538
  98. warp/fem/space/hexmesh_function_space.py +352 -0
  99. warp/fem/space/partition.py +129 -76
  100. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  101. warp/fem/space/restriction.py +46 -34
  102. warp/fem/space/shape/__init__.py +15 -0
  103. warp/fem/space/shape/cube_shape_function.py +738 -0
  104. warp/fem/space/shape/shape_function.py +103 -0
  105. warp/fem/space/shape/square_shape_function.py +611 -0
  106. warp/fem/space/shape/tet_shape_function.py +567 -0
  107. warp/fem/space/shape/triangle_shape_function.py +429 -0
  108. warp/fem/space/tetmesh_function_space.py +132 -1039
  109. warp/fem/space/topology.py +295 -0
  110. warp/fem/space/trimesh_2d_function_space.py +104 -742
  111. warp/fem/types.py +13 -11
  112. warp/fem/utils.py +335 -60
  113. warp/native/array.h +120 -34
  114. warp/native/builtin.h +101 -72
  115. warp/native/bvh.cpp +73 -325
  116. warp/native/bvh.cu +406 -23
  117. warp/native/bvh.h +22 -40
  118. warp/native/clang/clang.cpp +1 -0
  119. warp/native/crt.h +2 -0
  120. warp/native/cuda_util.cpp +8 -3
  121. warp/native/cuda_util.h +1 -0
  122. warp/native/exports.h +1522 -1243
  123. warp/native/intersect.h +19 -4
  124. warp/native/intersect_adj.h +8 -8
  125. warp/native/mat.h +76 -17
  126. warp/native/mesh.cpp +33 -108
  127. warp/native/mesh.cu +114 -18
  128. warp/native/mesh.h +395 -40
  129. warp/native/noise.h +272 -329
  130. warp/native/quat.h +51 -8
  131. warp/native/rand.h +44 -34
  132. warp/native/reduce.cpp +1 -1
  133. warp/native/sparse.cpp +4 -4
  134. warp/native/sparse.cu +163 -155
  135. warp/native/spatial.h +2 -2
  136. warp/native/temp_buffer.h +18 -14
  137. warp/native/vec.h +103 -21
  138. warp/native/warp.cpp +2 -1
  139. warp/native/warp.cu +28 -3
  140. warp/native/warp.h +4 -3
  141. warp/render/render_opengl.py +261 -109
  142. warp/sim/__init__.py +1 -2
  143. warp/sim/articulation.py +385 -185
  144. warp/sim/import_mjcf.py +59 -48
  145. warp/sim/import_urdf.py +15 -15
  146. warp/sim/import_usd.py +174 -102
  147. warp/sim/inertia.py +17 -18
  148. warp/sim/integrator_xpbd.py +4 -3
  149. warp/sim/model.py +330 -250
  150. warp/sim/render.py +1 -1
  151. warp/sparse.py +625 -152
  152. warp/stubs.py +341 -309
  153. warp/tape.py +9 -6
  154. warp/tests/__main__.py +3 -6
  155. warp/tests/assets/curlnoise_golden.npy +0 -0
  156. warp/tests/assets/pnoise_golden.npy +0 -0
  157. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  158. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  159. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  160. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  161. warp/tests/aux_test_unresolved_func.py +14 -0
  162. warp/tests/aux_test_unresolved_symbol.py +14 -0
  163. warp/tests/disabled_kinematics.py +239 -0
  164. warp/tests/run_coverage_serial.py +31 -0
  165. warp/tests/test_adam.py +103 -106
  166. warp/tests/test_arithmetic.py +94 -74
  167. warp/tests/test_array.py +82 -101
  168. warp/tests/test_array_reduce.py +57 -23
  169. warp/tests/test_atomic.py +64 -28
  170. warp/tests/test_bool.py +22 -12
  171. warp/tests/test_builtins_resolution.py +1292 -0
  172. warp/tests/test_bvh.py +18 -18
  173. warp/tests/test_closest_point_edge_edge.py +54 -57
  174. warp/tests/test_codegen.py +165 -134
  175. warp/tests/test_compile_consts.py +28 -20
  176. warp/tests/test_conditional.py +108 -24
  177. warp/tests/test_copy.py +10 -12
  178. warp/tests/test_ctypes.py +112 -88
  179. warp/tests/test_dense.py +21 -14
  180. warp/tests/test_devices.py +98 -0
  181. warp/tests/test_dlpack.py +75 -75
  182. warp/tests/test_examples.py +237 -0
  183. warp/tests/test_fabricarray.py +22 -24
  184. warp/tests/test_fast_math.py +15 -11
  185. warp/tests/test_fem.py +1034 -124
  186. warp/tests/test_fp16.py +23 -16
  187. warp/tests/test_func.py +187 -86
  188. warp/tests/test_generics.py +194 -49
  189. warp/tests/test_grad.py +123 -181
  190. warp/tests/test_grad_customs.py +176 -0
  191. warp/tests/test_hash_grid.py +35 -34
  192. warp/tests/test_import.py +10 -23
  193. warp/tests/test_indexedarray.py +24 -25
  194. warp/tests/test_intersect.py +18 -9
  195. warp/tests/test_large.py +141 -0
  196. warp/tests/test_launch.py +14 -41
  197. warp/tests/test_lerp.py +64 -65
  198. warp/tests/test_lvalue.py +493 -0
  199. warp/tests/test_marching_cubes.py +12 -13
  200. warp/tests/test_mat.py +517 -2898
  201. warp/tests/test_mat_lite.py +115 -0
  202. warp/tests/test_mat_scalar_ops.py +2889 -0
  203. warp/tests/test_math.py +103 -9
  204. warp/tests/test_matmul.py +304 -69
  205. warp/tests/test_matmul_lite.py +410 -0
  206. warp/tests/test_mesh.py +60 -22
  207. warp/tests/test_mesh_query_aabb.py +21 -25
  208. warp/tests/test_mesh_query_point.py +111 -22
  209. warp/tests/test_mesh_query_ray.py +12 -24
  210. warp/tests/test_mlp.py +30 -22
  211. warp/tests/test_model.py +92 -89
  212. warp/tests/test_modules_lite.py +39 -0
  213. warp/tests/test_multigpu.py +88 -114
  214. warp/tests/test_noise.py +12 -11
  215. warp/tests/test_operators.py +16 -20
  216. warp/tests/test_options.py +11 -11
  217. warp/tests/test_pinned.py +17 -18
  218. warp/tests/test_print.py +32 -11
  219. warp/tests/test_quat.py +275 -129
  220. warp/tests/test_rand.py +18 -16
  221. warp/tests/test_reload.py +38 -34
  222. warp/tests/test_rounding.py +50 -43
  223. warp/tests/test_runlength_encode.py +168 -20
  224. warp/tests/test_smoothstep.py +9 -11
  225. warp/tests/test_snippet.py +143 -0
  226. warp/tests/test_sparse.py +261 -63
  227. warp/tests/test_spatial.py +276 -243
  228. warp/tests/test_streams.py +110 -85
  229. warp/tests/test_struct.py +268 -63
  230. warp/tests/test_tape.py +39 -21
  231. warp/tests/test_torch.py +90 -86
  232. warp/tests/test_transient_module.py +10 -12
  233. warp/tests/test_types.py +363 -0
  234. warp/tests/test_utils.py +451 -0
  235. warp/tests/test_vec.py +354 -2050
  236. warp/tests/test_vec_lite.py +73 -0
  237. warp/tests/test_vec_scalar_ops.py +2099 -0
  238. warp/tests/test_volume.py +418 -376
  239. warp/tests/test_volume_write.py +124 -134
  240. warp/tests/unittest_serial.py +35 -0
  241. warp/tests/unittest_suites.py +291 -0
  242. warp/tests/unittest_utils.py +342 -0
  243. warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
  244. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  245. warp/thirdparty/appdirs.py +36 -45
  246. warp/thirdparty/unittest_parallel.py +589 -0
  247. warp/types.py +622 -211
  248. warp/utils.py +54 -393
  249. warp_lang-1.0.0b6.dist-info/METADATA +238 -0
  250. warp_lang-1.0.0b6.dist-info/RECORD +409 -0
  251. {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
  252. examples/example_cache_management.py +0 -40
  253. examples/example_multigpu.py +0 -54
  254. examples/example_struct.py +0 -65
  255. examples/fem/example_stokes_transfer_3d.py +0 -210
  256. warp/fem/field/discrete_field.py +0 -80
  257. warp/fem/space/nodal_function_space.py +0 -233
  258. warp/tests/test_all.py +0 -223
  259. warp/tests/test_array_scan.py +0 -60
  260. warp/tests/test_base.py +0 -208
  261. warp/tests/test_unresolved_func.py +0 -7
  262. warp/tests/test_unresolved_symbol.py +0 -7
  263. warp_lang-1.0.0b2.dist-info/METADATA +0 -26
  264. warp_lang-1.0.0b2.dist-info/RECORD +0 -378
  265. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  266. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  267. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  268. {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
  269. {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0
warp/native/quat.h CHANGED
@@ -19,6 +19,15 @@ struct quat_t
19
19
  // zero constructor for adjoint variable initialization
20
20
  inline CUDA_CALLABLE quat_t(Type x=Type(0), Type y=Type(0), Type z=Type(0), Type w=Type(0)) : x(x), y(y), z(z), w(w) {}
21
21
  explicit inline CUDA_CALLABLE quat_t(const vec_t<3,Type>& v, Type w=Type(0)) : x(v[0]), y(v[1]), z(v[2]), w(w) {}
22
+
23
+ template<typename OtherType>
24
+ explicit inline CUDA_CALLABLE quat_t(const quat_t<OtherType>& other)
25
+ {
26
+ x = static_cast<Type>(other.x);
27
+ y = static_cast<Type>(other.y);
28
+ z = static_cast<Type>(other.z);
29
+ w = static_cast<Type>(other.w);
30
+ }
22
31
 
23
32
  // imaginary part
24
33
  Type x;
@@ -73,7 +82,17 @@ inline CUDA_CALLABLE void adj_quat_t(const vec_t<3,Type>& v, Type w, vec_t<3,Typ
73
82
  adj_v[0] += adj_ret.x;
74
83
  adj_v[1] += adj_ret.y;
75
84
  adj_v[2] += adj_ret.z;
76
- adj_w += adj_ret.w;
85
+ adj_w += adj_ret.w;
86
+ }
87
+
88
+ // casting constructor adjoint
89
+ template<typename Type, typename OtherType>
90
+ inline CUDA_CALLABLE void adj_quat_t(const quat_t<OtherType>& other, quat_t<OtherType>& adj_other, const quat_t<Type>& adj_ret)
91
+ {
92
+ adj_other.x += static_cast<OtherType>(adj_ret.x);
93
+ adj_other.y += static_cast<OtherType>(adj_ret.y);
94
+ adj_other.z += static_cast<OtherType>(adj_ret.z);
95
+ adj_other.w += static_cast<OtherType>(adj_ret.w);
77
96
  }
78
97
 
79
98
  // forward methods
@@ -206,12 +225,24 @@ inline CUDA_CALLABLE quat_t<Type> div(quat_t<Type> q, Type s)
206
225
  return quat_t<Type>(q.x/s, q.y/s, q.z/s, q.w/s);
207
226
  }
208
227
 
228
+ template<typename Type>
229
+ inline CUDA_CALLABLE quat_t<Type> div(Type s, quat_t<Type> q)
230
+ {
231
+ return quat_t<Type>(s/q.x, s/q.y, s/q.z, s/q.w);
232
+ }
233
+
209
234
  template<typename Type>
210
235
  inline CUDA_CALLABLE quat_t<Type> operator / (quat_t<Type> a, Type s)
211
236
  {
212
237
  return div(a,s);
213
238
  }
214
239
 
240
+ template<typename Type>
241
+ inline CUDA_CALLABLE quat_t<Type> operator / (Type s, quat_t<Type> a)
242
+ {
243
+ return div(s,a);
244
+ }
245
+
215
246
  template<typename Type>
216
247
  inline CUDA_CALLABLE quat_t<Type> operator*(Type s, const quat_t<Type>& a)
217
248
  {
@@ -321,7 +352,7 @@ inline CUDA_CALLABLE quat_t<Type> quat_from_matrix(const mat_t<3,3,Type>& m)
321
352
  }
322
353
 
323
354
  template<typename Type>
324
- inline CUDA_CALLABLE Type index(const quat_t<Type>& a, int idx)
355
+ inline CUDA_CALLABLE Type extract(const quat_t<Type>& a, int idx)
325
356
  {
326
357
  #if FP_CHECK
327
358
  if (idx < 0 || idx > 3)
@@ -357,7 +388,7 @@ CUDA_CALLABLE inline void adj_lerp(const quat_t<Type>& a, const quat_t<Type>& b,
357
388
  }
358
389
 
359
390
  template<typename Type>
360
- inline CUDA_CALLABLE void adj_index(const quat_t<Type>& a, int idx, quat_t<Type>& adj_a, int & adj_idx, Type & adj_ret)
391
+ inline CUDA_CALLABLE void adj_extract(const quat_t<Type>& a, int idx, quat_t<Type>& adj_a, int & adj_idx, Type & adj_ret)
361
392
  {
362
393
  #if FP_CHECK
363
394
  if (idx < 0 || idx > 3)
@@ -367,7 +398,7 @@ inline CUDA_CALLABLE void adj_index(const quat_t<Type>& a, int idx, quat_t<Type>
367
398
  }
368
399
  #endif
369
400
 
370
- // See wp::index(const quat_t<Type>& a, int idx) note
401
+ // See wp::extract(const quat_t<Type>& a, int idx) note
371
402
  if (idx == 0) {adj_a.x += adj_ret;}
372
403
  else if (idx == 1) {adj_a.y += adj_ret;}
373
404
  else if (idx == 2) {adj_a.z += adj_ret;}
@@ -504,9 +535,14 @@ inline CUDA_CALLABLE void tensordot(const quat_t<Type>& a, const quat_t<Type>& b
504
535
  }
505
536
 
506
537
  template<typename Type>
507
- inline CUDA_CALLABLE void adj_length(const quat_t<Type>& a, quat_t<Type>& adj_a, const Type adj_ret)
538
+ inline CUDA_CALLABLE void adj_length(const quat_t<Type>& a, Type ret, quat_t<Type>& adj_a, const Type adj_ret)
508
539
  {
509
- adj_a += normalize(a)*adj_ret;
540
+ if (ret > Type(kEps))
541
+ {
542
+ Type inv_l = Type(1)/ret;
543
+
544
+ adj_a += quat_t<Type>(a.x*inv_l, a.y*inv_l, a.z*inv_l, a.w*inv_l) * adj_ret;
545
+ }
510
546
  }
511
547
 
512
548
  template<typename Type>
@@ -589,6 +625,13 @@ inline CUDA_CALLABLE void adj_div(quat_t<Type> a, Type s, quat_t<Type>& adj_a, T
589
625
  adj_a += adj_ret / s;
590
626
  }
591
627
 
628
+ template<typename Type>
629
+ inline CUDA_CALLABLE void adj_div(Type s, quat_t<Type> a, Type& adj_s, quat_t<Type>& adj_a, const quat_t<Type>& adj_ret)
630
+ {
631
+ adj_s -= dot(a, adj_ret)/ (s * s); // - a / s^2
632
+ adj_a += s / adj_ret;
633
+ }
634
+
592
635
  template<typename Type>
593
636
  inline CUDA_CALLABLE void adj_quat_rotate(const quat_t<Type>& q, const vec_t<3,Type>& p, quat_t<Type>& adj_q, vec_t<3,Type>& adj_p, const vec_t<3,Type>& adj_ret)
594
637
  {
@@ -658,7 +701,7 @@ inline CUDA_CALLABLE void adj_quat_rotate_inv(const quat_t<Type>& q, const vec_t
658
701
  }
659
702
 
660
703
  template<typename Type>
661
- inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Type>& q1, Type t, quat_t<Type>& adj_q0, quat_t<Type>& adj_q1, Type& adj_t, const quat_t<Type>& adj_ret)
704
+ inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Type>& q1, Type t, quat_t<Type>& ret, quat_t<Type>& adj_q0, quat_t<Type>& adj_q1, Type& adj_t, const quat_t<Type>& adj_ret)
662
705
  {
663
706
  vec_t<3,Type> axis;
664
707
  Type angle;
@@ -669,7 +712,7 @@ inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Ty
669
712
  angle = angle * 0.5;
670
713
 
671
714
  // adj_t
672
- adj_t += dot(mul(quat_slerp(q0, q1, t), quat_t<Type>(angle*axis[0], angle*axis[1], angle*axis[2], Type(0))), adj_ret);
715
+ adj_t += dot(mul(ret, quat_t<Type>(angle*axis[0], angle*axis[1], angle*axis[2], Type(0))), adj_ret);
673
716
 
674
717
  // adj_q0
675
718
  quat_t<Type> q_inc_x_q0;
warp/native/rand.h CHANGED
@@ -9,8 +9,8 @@
9
9
  # pragma once
10
10
  #include "array.h"
11
11
 
12
- #ifndef M_PI
13
- #define M_PI 3.14159265358979323846f
12
+ #ifndef M_PI_F
13
+ #define M_PI_F 3.14159265358979323846f
14
14
  #endif
15
15
 
16
16
  namespace wp
@@ -33,7 +33,7 @@ inline CUDA_CALLABLE float randf(uint32& state) { state = rand_pcg(state); retur
33
33
  inline CUDA_CALLABLE float randf(uint32& state, float min, float max) { return (max - min) * randf(state) + min; }
34
34
 
35
35
  // Box-Muller method
36
- inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state))) * cos(2.f * M_PI * randf(state)); }
36
+ inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state))) * cos(2.f * M_PI_F * randf(state)); }
37
37
 
38
38
  inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed, float adj_ret) {}
39
39
  inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset, float adj_ret) {}
@@ -55,14 +55,14 @@ inline CUDA_CALLABLE int sample_cdf(uint32& state, const array_t<float>& cdf)
55
55
  inline CUDA_CALLABLE vec2 sample_triangle(uint32& state)
56
56
  {
57
57
  float r = sqrt(randf(state));
58
- float u = 1.0 - r;
58
+ float u = 1.f - r;
59
59
  float v = randf(state) * r;
60
60
  return vec2(u, v);
61
61
  }
62
62
 
63
63
  inline CUDA_CALLABLE vec2 sample_unit_ring(uint32& state)
64
64
  {
65
- float theta = randf(state, 0.f, 2.f*M_PI);
65
+ float theta = randf(state, 0.f, 2.f*M_PI_F);
66
66
  float x = cos(theta);
67
67
  float y = sin(theta);
68
68
  return vec2(x, y);
@@ -71,7 +71,7 @@ inline CUDA_CALLABLE vec2 sample_unit_ring(uint32& state)
71
71
  inline CUDA_CALLABLE vec2 sample_unit_disk(uint32& state)
72
72
  {
73
73
  float r = sqrt(randf(state));
74
- float theta = randf(state, 0.f, 2.f*M_PI);
74
+ float theta = randf(state, 0.f, 2.f*M_PI_F);
75
75
  float x = r * cos(theta);
76
76
  float y = r * sin(theta);
77
77
  return vec2(x, y);
@@ -80,7 +80,7 @@ inline CUDA_CALLABLE vec2 sample_unit_disk(uint32& state)
80
80
  inline CUDA_CALLABLE vec3 sample_unit_sphere_surface(uint32& state)
81
81
  {
82
82
  float phi = acos(1.f - 2.f * randf(state));
83
- float theta = randf(state, 0.f, 2.f*M_PI);
83
+ float theta = randf(state, 0.f, 2.f*M_PI_F);
84
84
  float x = cos(theta) * sin(phi);
85
85
  float y = sin(theta) * sin(phi);
86
86
  float z = cos(phi);
@@ -90,7 +90,7 @@ inline CUDA_CALLABLE vec3 sample_unit_sphere_surface(uint32& state)
90
90
  inline CUDA_CALLABLE vec3 sample_unit_sphere(uint32& state)
91
91
  {
92
92
  float phi = acos(1.f - 2.f * randf(state));
93
- float theta = randf(state, 0.f, 2.f*M_PI);
93
+ float theta = randf(state, 0.f, 2.f*M_PI_F);
94
94
  float r = pow(randf(state), 1.f/3.f);
95
95
  float x = r * cos(theta) * sin(phi);
96
96
  float y = r * sin(theta) * sin(phi);
@@ -101,7 +101,7 @@ inline CUDA_CALLABLE vec3 sample_unit_sphere(uint32& state)
101
101
  inline CUDA_CALLABLE vec3 sample_unit_hemisphere_surface(uint32& state)
102
102
  {
103
103
  float phi = acos(1.f - randf(state));
104
- float theta = randf(state, 0.f, 2.f*M_PI);
104
+ float theta = randf(state, 0.f, 2.f*M_PI_F);
105
105
  float x = cos(theta) * sin(phi);
106
106
  float y = sin(theta) * sin(phi);
107
107
  float z = cos(phi);
@@ -111,7 +111,7 @@ inline CUDA_CALLABLE vec3 sample_unit_hemisphere_surface(uint32& state)
111
111
  inline CUDA_CALLABLE vec3 sample_unit_hemisphere(uint32& state)
112
112
  {
113
113
  float phi = acos(1.f - randf(state));
114
- float theta = randf(state, 0.f, 2.f*M_PI);
114
+ float theta = randf(state, 0.f, 2.f*M_PI_F);
115
115
  float r = pow(randf(state), 1.f/3.f);
116
116
  float x = r * cos(theta) * sin(phi);
117
117
  float y = r * sin(theta) * sin(phi);
@@ -134,6 +134,15 @@ inline CUDA_CALLABLE vec3 sample_unit_cube(uint32& state)
134
134
  return vec3(x, y, z);
135
135
  }
136
136
 
137
+ inline CUDA_CALLABLE vec4 sample_unit_hypercube(uint32& state)
138
+ {
139
+ float a = randf(state) - 0.5f;
140
+ float b = randf(state) - 0.5f;
141
+ float c = randf(state) - 0.5f;
142
+ float d = randf(state) - 0.5f;
143
+ return vec4(a, b, c, d);
144
+ }
145
+
137
146
  inline CUDA_CALLABLE void adj_sample_cdf(uint32& state, const array_t<float>& cdf, uint32& adj_state, array_t<float>& adj_cdf, const int& adj_ret) {}
138
147
  inline CUDA_CALLABLE void adj_sample_triangle(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
139
148
  inline CUDA_CALLABLE void adj_sample_unit_ring(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
@@ -144,6 +153,7 @@ inline CUDA_CALLABLE void adj_sample_unit_hemisphere_surface(uint32& state, uint
144
153
  inline CUDA_CALLABLE void adj_sample_unit_hemisphere(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
145
154
  inline CUDA_CALLABLE void adj_sample_unit_square(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
146
155
  inline CUDA_CALLABLE void adj_sample_unit_cube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
156
+ inline CUDA_CALLABLE void adj_sample_unit_hypercube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
147
157
 
148
158
  /*
149
159
  * log-gamma function to support some of these distributions. The
@@ -158,17 +168,17 @@ inline CUDA_CALLABLE float random_loggam(float x)
158
168
  float x0, x2, lg2pi, gl, gl0;
159
169
  uint32 n;
160
170
 
161
- const float a[10] = {8.333333333333333e-02, -2.777777777777778e-03,
162
- 7.936507936507937e-04, -5.952380952380952e-04,
163
- 8.417508417508418e-04, -1.917526917526918e-03,
164
- 6.410256410256410e-03, -2.955065359477124e-02,
165
- 1.796443723688307e-01, -1.39243221690590e+00};
171
+ const float a[10] = {8.333333333333333e-02f, -2.777777777777778e-03f,
172
+ 7.936507936507937e-04f, -5.952380952380952e-04f,
173
+ 8.417508417508418e-04f, -1.917526917526918e-03f,
174
+ 6.410256410256410e-03f, -2.955065359477124e-02f,
175
+ 1.796443723688307e-01f, -1.39243221690590e+00f};
166
176
 
167
- if ((x == 1.0) || (x == 2.0))
177
+ if ((x == 1.f) || (x == 2.f))
168
178
  {
169
- return 0.0;
179
+ return 0.f;
170
180
  }
171
- else if (x < 7.0)
181
+ else if (x < 7.f)
172
182
  {
173
183
  n = uint32((7 - x));
174
184
  }
@@ -178,8 +188,8 @@ inline CUDA_CALLABLE float random_loggam(float x)
178
188
  }
179
189
 
180
190
  x0 = x + float(n);
181
- x2 = (1.0 / x0) * (1.0 / x0);
182
- // log(2 * M_PI)
191
+ x2 = (1.f / x0) * (1.f / x0);
192
+ // log(2 * M_PI_F)
183
193
  lg2pi = 1.8378770664093453f;
184
194
  gl0 = a[9];
185
195
  for (int i = 8; i >= 0; i--)
@@ -187,13 +197,13 @@ inline CUDA_CALLABLE float random_loggam(float x)
187
197
  gl0 *= x2;
188
198
  gl0 += a[i];
189
199
  }
190
- gl = gl0 / x0 + 0.5 * lg2pi + (x0 - 0.5) * log(x0) - x0;
191
- if (x < 7.0)
200
+ gl = gl0 / x0 + 0.5f * lg2pi + (x0 - 0.5f) * log(x0) - x0;
201
+ if (x < 7.f)
192
202
  {
193
203
  for (uint32 k = 1; k <= n; k++)
194
204
  {
195
- gl -= log(x0 - 1.0);
196
- x0 -= 1.0;
205
+ gl -= log(x0 - 1.f);
206
+ x0 -= 1.f;
197
207
  }
198
208
  }
199
209
  return gl;
@@ -205,7 +215,7 @@ inline CUDA_CALLABLE uint32 random_poisson_mult(uint32& state, float lam) {
205
215
 
206
216
  enlam = exp(-lam);
207
217
  X = 0;
208
- prod = 1.0;
218
+ prod = 1.f;
209
219
 
210
220
  while (1)
211
221
  {
@@ -234,22 +244,22 @@ inline CUDA_CALLABLE uint32 random_poisson(uint32& state, float lam)
234
244
 
235
245
  slam = sqrt(lam);
236
246
  loglam = log(lam);
237
- b = 0.931 + 2.53 * slam;
238
- a = -0.059 + 0.02483 * b;
239
- invalpha = 1.1239 + 1.1328 / (b - 3.4);
240
- vr = 0.9277 - 3.6224 / (b - 2.0);
247
+ b = 0.931f + 2.53f * slam;
248
+ a = -0.059f + 0.02483f * b;
249
+ invalpha = 1.1239f + 1.1328f / (b - 3.4f);
250
+ vr = 0.9277f - 3.6224f / (b - 2.f);
241
251
 
242
252
  while (1)
243
253
  {
244
- U = randf(state) - 0.5;
254
+ U = randf(state) - 0.5f;
245
255
  V = randf(state);
246
- us = 0.5 - abs(U);
247
- k = uint32(floor((2 * a / us + b) * U + lam + 0.43));
248
- if ((us >= 0.07) && (V <= vr))
256
+ us = 0.5f - abs(U);
257
+ k = uint32(floor((2.f * a / us + b) * U + lam + 0.43f));
258
+ if ((us >= 0.07f) && (V <= vr))
249
259
  {
250
260
  return k;
251
261
  }
252
- if ((us < 0.013) && (V > us))
262
+ if ((us < 0.013f) && (V > us))
253
263
  {
254
264
  continue;
255
265
  }
warp/native/reduce.cpp CHANGED
@@ -97,7 +97,7 @@ template <typename T> void array_sum_host(const T *ptr_a, T *ptr_out, int count,
97
97
  accumulate_func = dyn_len_sum<T>;
98
98
  }
99
99
 
100
- *ptr_out = 0.0f;
100
+ memset(ptr_out, 0, sizeof(T)*type_length);
101
101
  for (int i = 0; i < count; ++i)
102
102
  accumulate_func(ptr_a + i * stride, ptr_out, type_length);
103
103
  }
warp/native/sparse.cpp CHANGED
@@ -179,10 +179,10 @@ void bsr_transpose_host(int rows_per_block, int cols_per_block, int row_count, i
179
179
  const int block_size = rows_per_block * cols_per_block;
180
180
 
181
181
  void (*block_transpose_func)(const T *, T *, int, int) = bsr_dyn_block_transpose<T>;
182
- switch (row_count)
182
+ switch (rows_per_block)
183
183
  {
184
184
  case 1:
185
- switch (col_count)
185
+ switch (cols_per_block)
186
186
  {
187
187
  case 1:
188
188
  block_transpose_func = bsr_fixed_block_transpose<1, 1, T>;
@@ -196,7 +196,7 @@ void bsr_transpose_host(int rows_per_block, int cols_per_block, int row_count, i
196
196
  }
197
197
  break;
198
198
  case 2:
199
- switch (col_count)
199
+ switch (cols_per_block)
200
200
  {
201
201
  case 1:
202
202
  block_transpose_func = bsr_fixed_block_transpose<2, 1, T>;
@@ -210,7 +210,7 @@ void bsr_transpose_host(int rows_per_block, int cols_per_block, int row_count, i
210
210
  }
211
211
  break;
212
212
  case 3:
213
- switch (col_count)
213
+ switch (cols_per_block)
214
214
  {
215
215
  case 1:
216
216
  block_transpose_func = bsr_fixed_block_transpose<3, 1, T>;