warp-lang 1.4.0__py3-none-macosx_10_13_universal2.whl → 1.4.2__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/builtins.py +68 -62
- warp/codegen.py +17 -13
- warp/config.py +1 -1
- warp/context.py +26 -23
- warp/examples/core/example_dem.py +2 -1
- warp/examples/core/example_mesh_intersect.py +3 -3
- warp/examples/fem/utils.py +2 -1
- warp/examples/optim/example_walker.py +2 -2
- warp/examples/sim/example_jacobian_ik.py +6 -2
- warp/fem/utils.py +9 -4
- warp/native/array.h +40 -40
- warp/native/builtin.h +58 -17
- warp/native/bvh.cu +2 -2
- warp/native/exports.h +17 -0
- warp/native/mesh.cu +2 -2
- warp/native/range.h +11 -2
- warp/sim/integrator_xpbd.py +2 -6
- warp/sim/model.py +4 -5
- warp/sparse.py +9 -7
- warp/stubs.py +82 -81
- warp/tests/test_array.py +168 -48
- warp/tests/test_closest_point_edge_edge.py +8 -8
- warp/tests/test_codegen.py +70 -0
- warp/tests/test_fabricarray.py +33 -0
- warp/tests/test_fem.py +17 -1
- warp/tests/test_func.py +35 -1
- warp/tests/test_mesh_query_point.py +4 -3
- warp/tests/test_model.py +13 -0
- warp/tests/test_print.py +135 -0
- warp/tests/test_static.py +157 -1
- warp/tests/unittest_suites.py +4 -0
- warp/types.py +18 -9
- {warp_lang-1.4.0.dist-info → warp_lang-1.4.2.dist-info}/METADATA +4 -4
- {warp_lang-1.4.0.dist-info → warp_lang-1.4.2.dist-info}/RECORD +39 -39
- {warp_lang-1.4.0.dist-info → warp_lang-1.4.2.dist-info}/WHEEL +1 -1
- {warp_lang-1.4.0.dist-info → warp_lang-1.4.2.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.4.0.dist-info → warp_lang-1.4.2.dist-info}/top_level.txt +0 -0
warp/examples/fem/utils.py
CHANGED
|
@@ -143,7 +143,7 @@ def gen_hexmesh(res, bounds_lo: Optional[wp.vec3] = None, bounds_hi: Optional[wp
|
|
|
143
143
|
|
|
144
144
|
x = np.linspace(bounds_lo[0], bounds_hi[0], Nx + 1)
|
|
145
145
|
y = np.linspace(bounds_lo[1], bounds_hi[1], Ny + 1)
|
|
146
|
-
z = np.linspace(bounds_lo[
|
|
146
|
+
z = np.linspace(bounds_lo[2], bounds_hi[2], Nz + 1)
|
|
147
147
|
|
|
148
148
|
positions = np.transpose(np.meshgrid(x, y, z, indexing="ij"), axes=(1, 2, 3, 0)).reshape(-1, 3)
|
|
149
149
|
|
|
@@ -252,6 +252,7 @@ def bsr_cg(
|
|
|
252
252
|
check_every=check_every,
|
|
253
253
|
M=M,
|
|
254
254
|
callback=callback,
|
|
255
|
+
use_cuda_graph=not wp.config.verify_cuda,
|
|
255
256
|
)
|
|
256
257
|
|
|
257
258
|
if not quiet:
|
|
@@ -21,7 +21,7 @@ import math
|
|
|
21
21
|
import os
|
|
22
22
|
|
|
23
23
|
import numpy as np
|
|
24
|
-
from pxr import Usd, UsdGeom
|
|
24
|
+
from pxr import Gf, Usd, UsdGeom
|
|
25
25
|
|
|
26
26
|
import warp as wp
|
|
27
27
|
import warp.examples
|
|
@@ -93,7 +93,7 @@ class Example:
|
|
|
93
93
|
geom = UsdGeom.Mesh(asset_stage.GetPrimAtPath("/root/bear"))
|
|
94
94
|
points = geom.GetPointsAttr().Get()
|
|
95
95
|
|
|
96
|
-
xform = geom.ComputeLocalToWorldTransform(0.0)
|
|
96
|
+
xform = Gf.Matrix4f(geom.ComputeLocalToWorldTransform(0.0))
|
|
97
97
|
for i in range(len(points)):
|
|
98
98
|
points[i] = xform.Transform(points[i])
|
|
99
99
|
|
|
@@ -41,6 +41,8 @@ def compute_endeffector_position(
|
|
|
41
41
|
|
|
42
42
|
class Example:
|
|
43
43
|
def __init__(self, stage_path="example_jacobian_ik.usd", num_envs=10):
|
|
44
|
+
rng = np.random.default_rng(42)
|
|
45
|
+
|
|
44
46
|
builder = wp.sim.ModelBuilder()
|
|
45
47
|
|
|
46
48
|
self.num_envs = num_envs
|
|
@@ -81,7 +83,7 @@ class Example:
|
|
|
81
83
|
)
|
|
82
84
|
self.target_origin.append((i * 2.0, 4.0, 0.0))
|
|
83
85
|
# joint initial positions
|
|
84
|
-
builder.joint_q[-3:] =
|
|
86
|
+
builder.joint_q[-3:] = rng.uniform(-0.5, 0.5, size=3)
|
|
85
87
|
self.target_origin = np.array(self.target_origin)
|
|
86
88
|
|
|
87
89
|
# finalize model
|
|
@@ -207,6 +209,8 @@ if __name__ == "__main__":
|
|
|
207
209
|
|
|
208
210
|
args = parser.parse_known_args()[0]
|
|
209
211
|
|
|
212
|
+
rng = np.random.default_rng(42)
|
|
213
|
+
|
|
210
214
|
with wp.ScopedDevice(args.device):
|
|
211
215
|
example = Example(stage_path=args.stage_path, num_envs=args.num_envs)
|
|
212
216
|
|
|
@@ -218,7 +222,7 @@ if __name__ == "__main__":
|
|
|
218
222
|
for _ in range(args.num_rollouts):
|
|
219
223
|
# select new random target points for all envs
|
|
220
224
|
example.targets = example.target_origin.copy()
|
|
221
|
-
example.targets[:, 1:] +=
|
|
225
|
+
example.targets[:, 1:] += rng.uniform(-0.5, 0.5, size=(example.num_envs, 2))
|
|
222
226
|
|
|
223
227
|
for iter in range(args.train_iters):
|
|
224
228
|
example.step()
|
warp/fem/utils.py
CHANGED
|
@@ -193,9 +193,14 @@ def _givens_rotation(a: Any, b: Any):
|
|
|
193
193
|
# Givens rotation [[c -s], [s c]] such that sa+cb =0
|
|
194
194
|
zero = type(a)(0.0)
|
|
195
195
|
one = type(a)(1.0)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
196
|
+
|
|
197
|
+
b2 = b * b
|
|
198
|
+
if b2 == zero:
|
|
199
|
+
# id rotation
|
|
200
|
+
return one, zero
|
|
201
|
+
|
|
202
|
+
scale = one / wp.sqrt(a * a + b2)
|
|
203
|
+
return a * scale, -b * scale
|
|
199
204
|
|
|
200
205
|
|
|
201
206
|
@wp.func
|
|
@@ -229,7 +234,7 @@ def tridiagonal_symmetric_eigenvalues_qr(D: Any, L: Any, Q: Any, tol: Any):
|
|
|
229
234
|
x = D.dtype(0.0) # coeff atop buldge
|
|
230
235
|
|
|
231
236
|
for _ in range(32 * m): # failsafe, usually converges faster than that
|
|
232
|
-
# Iterate over all
|
|
237
|
+
# Iterate over all independent (deflated) blocks
|
|
233
238
|
end = int(-1)
|
|
234
239
|
|
|
235
240
|
for k in range(m - 1):
|
warp/native/array.h
CHANGED
|
@@ -811,7 +811,7 @@ CUDA_CALLABLE inline void adj_atomic_add(bool* buf, bool value) { }
|
|
|
811
811
|
|
|
812
812
|
// only generate gradients for T types
|
|
813
813
|
template<typename T>
|
|
814
|
-
inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, const array_t<T>& adj_buf, int
|
|
814
|
+
inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, const array_t<T>& adj_buf, int adj_i, const T& adj_output)
|
|
815
815
|
{
|
|
816
816
|
if (adj_buf.data)
|
|
817
817
|
adj_atomic_add(&index(adj_buf, i), adj_output);
|
|
@@ -819,7 +819,7 @@ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, const array_
|
|
|
819
819
|
adj_atomic_add(&index_grad(buf, i), adj_output);
|
|
820
820
|
}
|
|
821
821
|
template<typename T>
|
|
822
|
-
inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, const array_t<T>& adj_buf, int
|
|
822
|
+
inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, const array_t<T>& adj_buf, int adj_i, int adj_j, const T& adj_output)
|
|
823
823
|
{
|
|
824
824
|
if (adj_buf.data)
|
|
825
825
|
adj_atomic_add(&index(adj_buf, i, j), adj_output);
|
|
@@ -827,7 +827,7 @@ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, const
|
|
|
827
827
|
adj_atomic_add(&index_grad(buf, i, j), adj_output);
|
|
828
828
|
}
|
|
829
829
|
template<typename T>
|
|
830
|
-
inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k, const array_t<T>& adj_buf, int
|
|
830
|
+
inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, const T& adj_output)
|
|
831
831
|
{
|
|
832
832
|
if (adj_buf.data)
|
|
833
833
|
adj_atomic_add(&index(adj_buf, i, j, k), adj_output);
|
|
@@ -835,7 +835,7 @@ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k
|
|
|
835
835
|
adj_atomic_add(&index_grad(buf, i, j, k), adj_output);
|
|
836
836
|
}
|
|
837
837
|
template<typename T>
|
|
838
|
-
inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k, int l, const array_t<T>& adj_buf, int
|
|
838
|
+
inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k, int l, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, const T& adj_output)
|
|
839
839
|
{
|
|
840
840
|
if (adj_buf.data)
|
|
841
841
|
adj_atomic_add(&index(adj_buf, i, j, k, l), adj_output);
|
|
@@ -844,7 +844,7 @@ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k
|
|
|
844
844
|
}
|
|
845
845
|
|
|
846
846
|
template<typename T>
|
|
847
|
-
inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int
|
|
847
|
+
inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int adj_i, T& adj_value)
|
|
848
848
|
{
|
|
849
849
|
if (adj_buf.data)
|
|
850
850
|
adj_value += index(adj_buf, i);
|
|
@@ -854,7 +854,7 @@ inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, T value,
|
|
|
854
854
|
FP_VERIFY_ADJ_1(value, adj_value)
|
|
855
855
|
}
|
|
856
856
|
template<typename T>
|
|
857
|
-
inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int
|
|
857
|
+
inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, T& adj_value)
|
|
858
858
|
{
|
|
859
859
|
if (adj_buf.data)
|
|
860
860
|
adj_value += index(adj_buf, i, j);
|
|
@@ -864,7 +864,7 @@ inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, T
|
|
|
864
864
|
FP_VERIFY_ADJ_2(value, adj_value)
|
|
865
865
|
}
|
|
866
866
|
template<typename T>
|
|
867
|
-
inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int
|
|
867
|
+
inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value)
|
|
868
868
|
{
|
|
869
869
|
if (adj_buf.data)
|
|
870
870
|
adj_value += index(adj_buf, i, j, k);
|
|
@@ -874,7 +874,7 @@ inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, i
|
|
|
874
874
|
FP_VERIFY_ADJ_3(value, adj_value)
|
|
875
875
|
}
|
|
876
876
|
template<typename T>
|
|
877
|
-
inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int
|
|
877
|
+
inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value)
|
|
878
878
|
{
|
|
879
879
|
if (adj_buf.data)
|
|
880
880
|
adj_value += index(adj_buf, i, j, k, l);
|
|
@@ -898,7 +898,7 @@ inline CUDA_CALLABLE void adj_load(const T* address, const T& adj_address, T& ad
|
|
|
898
898
|
}
|
|
899
899
|
|
|
900
900
|
template<typename T>
|
|
901
|
-
inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int
|
|
901
|
+
inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret)
|
|
902
902
|
{
|
|
903
903
|
if (adj_buf.data)
|
|
904
904
|
adj_value += index(adj_buf, i);
|
|
@@ -908,7 +908,7 @@ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, T value,
|
|
|
908
908
|
FP_VERIFY_ADJ_1(value, adj_value)
|
|
909
909
|
}
|
|
910
910
|
template<typename T>
|
|
911
|
-
inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int
|
|
911
|
+
inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret)
|
|
912
912
|
{
|
|
913
913
|
if (adj_buf.data)
|
|
914
914
|
adj_value += index(adj_buf, i, j);
|
|
@@ -918,7 +918,7 @@ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, T
|
|
|
918
918
|
FP_VERIFY_ADJ_2(value, adj_value)
|
|
919
919
|
}
|
|
920
920
|
template<typename T>
|
|
921
|
-
inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int
|
|
921
|
+
inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret)
|
|
922
922
|
{
|
|
923
923
|
if (adj_buf.data)
|
|
924
924
|
adj_value += index(adj_buf, i, j, k);
|
|
@@ -928,7 +928,7 @@ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, in
|
|
|
928
928
|
FP_VERIFY_ADJ_3(value, adj_value)
|
|
929
929
|
}
|
|
930
930
|
template<typename T>
|
|
931
|
-
inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int
|
|
931
|
+
inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret)
|
|
932
932
|
{
|
|
933
933
|
if (adj_buf.data)
|
|
934
934
|
adj_value += index(adj_buf, i, j, k, l);
|
|
@@ -939,7 +939,7 @@ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, in
|
|
|
939
939
|
}
|
|
940
940
|
|
|
941
941
|
template<typename T>
|
|
942
|
-
inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int
|
|
942
|
+
inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret)
|
|
943
943
|
{
|
|
944
944
|
if (adj_buf.data)
|
|
945
945
|
adj_value -= index(adj_buf, i);
|
|
@@ -949,7 +949,7 @@ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, T value,
|
|
|
949
949
|
FP_VERIFY_ADJ_1(value, adj_value)
|
|
950
950
|
}
|
|
951
951
|
template<typename T>
|
|
952
|
-
inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int
|
|
952
|
+
inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret)
|
|
953
953
|
{
|
|
954
954
|
if (adj_buf.data)
|
|
955
955
|
adj_value -= index(adj_buf, i, j);
|
|
@@ -959,7 +959,7 @@ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, T
|
|
|
959
959
|
FP_VERIFY_ADJ_2(value, adj_value)
|
|
960
960
|
}
|
|
961
961
|
template<typename T>
|
|
962
|
-
inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int
|
|
962
|
+
inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret)
|
|
963
963
|
{
|
|
964
964
|
if (adj_buf.data)
|
|
965
965
|
adj_value -= index(adj_buf, i, j, k);
|
|
@@ -969,7 +969,7 @@ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, in
|
|
|
969
969
|
FP_VERIFY_ADJ_3(value, adj_value)
|
|
970
970
|
}
|
|
971
971
|
template<typename T>
|
|
972
|
-
inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int
|
|
972
|
+
inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret)
|
|
973
973
|
{
|
|
974
974
|
if (adj_buf.data)
|
|
975
975
|
adj_value -= index(adj_buf, i, j, k, l);
|
|
@@ -981,44 +981,44 @@ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, in
|
|
|
981
981
|
|
|
982
982
|
// generic array types that do not support gradient computation (indexedarray, etc.)
|
|
983
983
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
984
|
-
inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, const A2<T>& adj_buf, int
|
|
984
|
+
inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, const A2<T>& adj_buf, int adj_i, const T& adj_output) {}
|
|
985
985
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
986
|
-
inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, const A2<T>& adj_buf, int
|
|
986
|
+
inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, const A2<T>& adj_buf, int adj_i, int adj_j, const T& adj_output) {}
|
|
987
987
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
988
|
-
inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, int k, const A2<T>& adj_buf, int
|
|
988
|
+
inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, int k, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, const T& adj_output) {}
|
|
989
989
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
990
|
-
inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, int k, int l, const A2<T>& adj_buf, int
|
|
990
|
+
inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, int k, int l, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, const T& adj_output) {}
|
|
991
991
|
|
|
992
992
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
993
|
-
inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int
|
|
993
|
+
inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int adj_i, T& adj_value) {}
|
|
994
994
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
995
|
-
inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int
|
|
995
|
+
inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int adj_i, int adj_j, T& adj_value) {}
|
|
996
996
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
997
|
-
inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int
|
|
997
|
+
inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value) {}
|
|
998
998
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
999
|
-
inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int
|
|
999
|
+
inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value) {}
|
|
1000
1000
|
|
|
1001
1001
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1002
|
-
inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int
|
|
1002
|
+
inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret) {}
|
|
1003
1003
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1004
|
-
inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int
|
|
1004
|
+
inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret) {}
|
|
1005
1005
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1006
|
-
inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int
|
|
1006
|
+
inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret) {}
|
|
1007
1007
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1008
|
-
inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int
|
|
1008
|
+
inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret) {}
|
|
1009
1009
|
|
|
1010
1010
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1011
|
-
inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int
|
|
1011
|
+
inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret) {}
|
|
1012
1012
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1013
|
-
inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int
|
|
1013
|
+
inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret) {}
|
|
1014
1014
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1015
|
-
inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int
|
|
1015
|
+
inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret) {}
|
|
1016
1016
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1017
|
-
inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int
|
|
1017
|
+
inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret) {}
|
|
1018
1018
|
|
|
1019
1019
|
// generic handler for scalar values
|
|
1020
1020
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1021
|
-
inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int
|
|
1021
|
+
inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret) {
|
|
1022
1022
|
if (adj_buf.data)
|
|
1023
1023
|
adj_atomic_minmax(&index(buf, i), &index(adj_buf, i), value, adj_value);
|
|
1024
1024
|
else if (buf.grad)
|
|
@@ -1027,7 +1027,7 @@ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, T value, const
|
|
|
1027
1027
|
FP_VERIFY_ADJ_1(value, adj_value)
|
|
1028
1028
|
}
|
|
1029
1029
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1030
|
-
inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int
|
|
1030
|
+
inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret) {
|
|
1031
1031
|
if (adj_buf.data)
|
|
1032
1032
|
adj_atomic_minmax(&index(buf, i, j), &index(adj_buf, i, j), value, adj_value);
|
|
1033
1033
|
else if (buf.grad)
|
|
@@ -1036,7 +1036,7 @@ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, T value
|
|
|
1036
1036
|
FP_VERIFY_ADJ_2(value, adj_value)
|
|
1037
1037
|
}
|
|
1038
1038
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1039
|
-
inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int
|
|
1039
|
+
inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret) {
|
|
1040
1040
|
if (adj_buf.data)
|
|
1041
1041
|
adj_atomic_minmax(&index(buf, i, j, k), &index(adj_buf, i, j, k), value, adj_value);
|
|
1042
1042
|
else if (buf.grad)
|
|
@@ -1045,7 +1045,7 @@ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k,
|
|
|
1045
1045
|
FP_VERIFY_ADJ_3(value, adj_value)
|
|
1046
1046
|
}
|
|
1047
1047
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1048
|
-
inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int
|
|
1048
|
+
inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret) {
|
|
1049
1049
|
if (adj_buf.data)
|
|
1050
1050
|
adj_atomic_minmax(&index(buf, i, j, k, l), &index(adj_buf, i, j, k, l), value, adj_value);
|
|
1051
1051
|
else if (buf.grad)
|
|
@@ -1055,7 +1055,7 @@ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k,
|
|
|
1055
1055
|
}
|
|
1056
1056
|
|
|
1057
1057
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1058
|
-
inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int
|
|
1058
|
+
inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret) {
|
|
1059
1059
|
if (adj_buf.data)
|
|
1060
1060
|
adj_atomic_minmax(&index(buf, i), &index(adj_buf, i), value, adj_value);
|
|
1061
1061
|
else if (buf.grad)
|
|
@@ -1064,7 +1064,7 @@ inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, T value, const
|
|
|
1064
1064
|
FP_VERIFY_ADJ_1(value, adj_value)
|
|
1065
1065
|
}
|
|
1066
1066
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1067
|
-
inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int
|
|
1067
|
+
inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret) {
|
|
1068
1068
|
if (adj_buf.data)
|
|
1069
1069
|
adj_atomic_minmax(&index(buf, i, j), &index(adj_buf, i, j), value, adj_value);
|
|
1070
1070
|
else if (buf.grad)
|
|
@@ -1073,7 +1073,7 @@ inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, T value
|
|
|
1073
1073
|
FP_VERIFY_ADJ_2(value, adj_value)
|
|
1074
1074
|
}
|
|
1075
1075
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1076
|
-
inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int
|
|
1076
|
+
inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret) {
|
|
1077
1077
|
if (adj_buf.data)
|
|
1078
1078
|
adj_atomic_minmax(&index(buf, i, j, k), &index(adj_buf, i, j, k), value, adj_value);
|
|
1079
1079
|
else if (buf.grad)
|
|
@@ -1082,7 +1082,7 @@ inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k,
|
|
|
1082
1082
|
FP_VERIFY_ADJ_3(value, adj_value)
|
|
1083
1083
|
}
|
|
1084
1084
|
template<template<typename> class A1, template<typename> class A2, typename T>
|
|
1085
|
-
inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int
|
|
1085
|
+
inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret) {
|
|
1086
1086
|
if (adj_buf.data)
|
|
1087
1087
|
adj_atomic_minmax(&index(buf, i, j, k, l), &index(adj_buf, i, j, k, l), value, adj_value);
|
|
1088
1088
|
else if (buf.grad)
|
warp/native/builtin.h
CHANGED
|
@@ -1575,32 +1575,73 @@ inline CUDA_CALLABLE void print(transform_t<Type> t)
|
|
|
1575
1575
|
printf("(%g %g %g) (%g %g %g %g)\n", float(t.p[0]), float(t.p[1]), float(t.p[2]), float(t.q.x), float(t.q.y), float(t.q.z), float(t.q.w));
|
|
1576
1576
|
}
|
|
1577
1577
|
|
|
1578
|
-
|
|
1579
|
-
inline CUDA_CALLABLE void adj_print(
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
inline CUDA_CALLABLE void adj_print(
|
|
1586
|
-
inline CUDA_CALLABLE void adj_print(
|
|
1587
|
-
inline CUDA_CALLABLE void adj_print(
|
|
1588
|
-
|
|
1578
|
+
template<typename T>
|
|
1579
|
+
inline CUDA_CALLABLE void adj_print(const T& x, const T& adj_x)
|
|
1580
|
+
{
|
|
1581
|
+
printf("adj: <type without print implementation>\n");
|
|
1582
|
+
}
|
|
1583
|
+
|
|
1584
|
+
// note: adj_print() only prints the adjoint value, since the value itself gets printed in replay print()
|
|
1585
|
+
inline CUDA_CALLABLE void adj_print(half x, half adj_x) { printf("adj: %g\n", half_to_float(adj_x)); }
|
|
1586
|
+
inline CUDA_CALLABLE void adj_print(float x, float adj_x) { printf("adj: %g\n", adj_x); }
|
|
1587
|
+
inline CUDA_CALLABLE void adj_print(double x, double adj_x) { printf("adj: %g\n", adj_x); }
|
|
1588
|
+
|
|
1589
|
+
inline CUDA_CALLABLE void adj_print(signed char x, signed char adj_x) { printf("adj: %d\n", adj_x); }
|
|
1590
|
+
inline CUDA_CALLABLE void adj_print(short x, short adj_x) { printf("adj: %d\n", adj_x); }
|
|
1591
|
+
inline CUDA_CALLABLE void adj_print(int x, int adj_x) { printf("adj: %d\n", adj_x); }
|
|
1592
|
+
inline CUDA_CALLABLE void adj_print(long x, long adj_x) { printf("adj: %ld\n", adj_x); }
|
|
1593
|
+
inline CUDA_CALLABLE void adj_print(long long x, long long adj_x) { printf("adj: %lld\n", adj_x); }
|
|
1594
|
+
|
|
1595
|
+
inline CUDA_CALLABLE void adj_print(unsigned char x, unsigned char adj_x) { printf("adj: %u\n", adj_x); }
|
|
1596
|
+
inline CUDA_CALLABLE void adj_print(unsigned short x, unsigned short adj_x) { printf("adj: %u\n", adj_x); }
|
|
1597
|
+
inline CUDA_CALLABLE void adj_print(unsigned x, unsigned adj_x) { printf("adj: %u\n", adj_x); }
|
|
1598
|
+
inline CUDA_CALLABLE void adj_print(unsigned long x, unsigned long adj_x) { printf("adj: %lu\n", adj_x); }
|
|
1599
|
+
inline CUDA_CALLABLE void adj_print(unsigned long long x, unsigned long long adj_x) { printf("adj: %llu\n", adj_x); }
|
|
1600
|
+
|
|
1601
|
+
inline CUDA_CALLABLE void adj_print(bool x, bool adj_x) { printf("adj: %s\n", (adj_x ? "True" : "False")); }
|
|
1589
1602
|
|
|
1590
1603
|
template<unsigned Length, typename Type>
|
|
1591
|
-
inline CUDA_CALLABLE void adj_print(vec_t<Length, Type
|
|
1604
|
+
inline CUDA_CALLABLE void adj_print(const vec_t<Length, Type>& v, const vec_t<Length, Type>& adj_v)
|
|
1605
|
+
{
|
|
1606
|
+
printf("adj:");
|
|
1607
|
+
for (unsigned i = 0; i < Length; i++)
|
|
1608
|
+
printf(" %g", float(adj_v[i]));
|
|
1609
|
+
printf("\n");
|
|
1610
|
+
}
|
|
1592
1611
|
|
|
1593
1612
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1594
|
-
inline CUDA_CALLABLE void adj_print(mat_t<Rows, Cols, Type
|
|
1613
|
+
inline CUDA_CALLABLE void adj_print(const mat_t<Rows, Cols, Type>& m, const mat_t<Rows, Cols, Type>& adj_m)
|
|
1614
|
+
{
|
|
1615
|
+
for (unsigned i = 0; i < Rows; i++)
|
|
1616
|
+
{
|
|
1617
|
+
if (i == 0)
|
|
1618
|
+
printf("adj:");
|
|
1619
|
+
else
|
|
1620
|
+
printf(" ");
|
|
1621
|
+
for (unsigned j = 0; j < Cols; j++)
|
|
1622
|
+
printf(" %g", float(adj_m.data[i][j]));
|
|
1623
|
+
printf("\n");
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1595
1626
|
|
|
1596
1627
|
template<typename Type>
|
|
1597
|
-
inline CUDA_CALLABLE void adj_print(quat_t<Type
|
|
1628
|
+
inline CUDA_CALLABLE void adj_print(const quat_t<Type>& q, const quat_t<Type>& adj_q)
|
|
1629
|
+
{
|
|
1630
|
+
printf("adj: %g %g %g %g\n", float(adj_q.x), float(adj_q.y), float(adj_q.z), float(adj_q.w));
|
|
1631
|
+
}
|
|
1598
1632
|
|
|
1599
1633
|
template<typename Type>
|
|
1600
|
-
inline CUDA_CALLABLE void adj_print(transform_t<Type
|
|
1601
|
-
|
|
1602
|
-
|
|
1634
|
+
inline CUDA_CALLABLE void adj_print(const transform_t<Type>& t, const transform_t<Type>& adj_t)
|
|
1635
|
+
{
|
|
1636
|
+
printf("adj: (%g %g %g) (%g %g %g %g)\n",
|
|
1637
|
+
float(adj_t.p[0]), float(adj_t.p[1]), float(adj_t.p[2]),
|
|
1638
|
+
float(adj_t.q.x), float(adj_t.q.y), float(adj_t.q.z), float(adj_t.q.w));
|
|
1639
|
+
}
|
|
1603
1640
|
|
|
1641
|
+
inline CUDA_CALLABLE void adj_print(str t, str& adj_t)
|
|
1642
|
+
{
|
|
1643
|
+
printf("adj: %s\n", t);
|
|
1644
|
+
}
|
|
1604
1645
|
|
|
1605
1646
|
template <typename T>
|
|
1606
1647
|
inline CUDA_CALLABLE void expect_eq(const T& actual, const T& expected)
|
warp/native/bvh.cu
CHANGED
|
@@ -65,7 +65,7 @@ __global__ void bvh_refit_kernel(int n, const int* __restrict__ parents, int* __
|
|
|
65
65
|
int finished = atomicAdd(&child_count[parent], 1);
|
|
66
66
|
|
|
67
67
|
// if we have are the last thread (such that the parent node is now complete)
|
|
68
|
-
// then update its bounds and move onto the
|
|
68
|
+
// then update its bounds and move onto the next parent in the hierarchy
|
|
69
69
|
if (finished == 1)
|
|
70
70
|
{
|
|
71
71
|
const int left_child = node_lowers[parent].i;
|
|
@@ -273,7 +273,7 @@ __global__ void build_hierarchy(int n, int* root, const int* __restrict__ deltas
|
|
|
273
273
|
}
|
|
274
274
|
|
|
275
275
|
// if we have are the last thread (such that the parent node is now complete)
|
|
276
|
-
// then update its bounds and move onto the
|
|
276
|
+
// then update its bounds and move onto the next parent in the hierarchy
|
|
277
277
|
if (childCount == 1)
|
|
278
278
|
{
|
|
279
279
|
const int left_child = lowers[parent].i;
|
warp/native/exports.h
CHANGED
|
@@ -1013,6 +1013,23 @@ WP_API void builtin_volume_index_to_world_uint64_vec3f(uint64 id, vec3f& uvw, ve
|
|
|
1013
1013
|
WP_API void builtin_volume_world_to_index_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index(id, xyz); }
|
|
1014
1014
|
WP_API void builtin_volume_index_to_world_dir_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world_dir(id, uvw); }
|
|
1015
1015
|
WP_API void builtin_volume_world_to_index_dir_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index_dir(id, xyz); }
|
|
1016
|
+
WP_API void builtin_rand_init_int32(int32 seed, uint32* ret) { *ret = wp::rand_init(seed); }
|
|
1017
|
+
WP_API void builtin_rand_init_int32_int32(int32 seed, int32 offset, uint32* ret) { *ret = wp::rand_init(seed, offset); }
|
|
1018
|
+
WP_API void builtin_randi_uint32(uint32 state, int* ret) { *ret = wp::randi(state); }
|
|
1019
|
+
WP_API void builtin_randi_uint32_int32_int32(uint32 state, int32 low, int32 high, int* ret) { *ret = wp::randi(state, low, high); }
|
|
1020
|
+
WP_API void builtin_randf_uint32(uint32 state, float* ret) { *ret = wp::randf(state); }
|
|
1021
|
+
WP_API void builtin_randf_uint32_float32_float32(uint32 state, float32 low, float32 high, float* ret) { *ret = wp::randf(state, low, high); }
|
|
1022
|
+
WP_API void builtin_randn_uint32(uint32 state, float* ret) { *ret = wp::randn(state); }
|
|
1023
|
+
WP_API void builtin_sample_triangle_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_triangle(state); }
|
|
1024
|
+
WP_API void builtin_sample_unit_ring_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_ring(state); }
|
|
1025
|
+
WP_API void builtin_sample_unit_disk_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_disk(state); }
|
|
1026
|
+
WP_API void builtin_sample_unit_sphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere_surface(state); }
|
|
1027
|
+
WP_API void builtin_sample_unit_sphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere(state); }
|
|
1028
|
+
WP_API void builtin_sample_unit_hemisphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere_surface(state); }
|
|
1029
|
+
WP_API void builtin_sample_unit_hemisphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere(state); }
|
|
1030
|
+
WP_API void builtin_sample_unit_square_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_square(state); }
|
|
1031
|
+
WP_API void builtin_sample_unit_cube_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_cube(state); }
|
|
1032
|
+
WP_API void builtin_poisson_uint32_float32(uint32 state, float32 lam, uint32* ret) { *ret = wp::poisson(state, lam); }
|
|
1016
1033
|
WP_API void builtin_noise_uint32_float32(uint32 state, float32 x, float* ret) { *ret = wp::noise(state, x); }
|
|
1017
1034
|
WP_API void builtin_noise_uint32_vec2f(uint32 state, vec2f& xy, float* ret) { *ret = wp::noise(state, xy); }
|
|
1018
1035
|
WP_API void builtin_noise_uint32_vec3f(uint32 state, vec3f& xyz, float* ret) { *ret = wp::noise(state, xyz); }
|
warp/native/mesh.cu
CHANGED
|
@@ -101,7 +101,7 @@ __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__
|
|
|
101
101
|
int finished = atomicAdd(&child_count[parent], 1);
|
|
102
102
|
|
|
103
103
|
// if we have are the last thread (such that the parent node is now complete)
|
|
104
|
-
// then update its bounds and move onto the
|
|
104
|
+
// then update its bounds and move onto the next parent in the hierarchy
|
|
105
105
|
if (finished == 1)
|
|
106
106
|
{
|
|
107
107
|
//printf("Compute non-leaf at %d\n", index);
|
|
@@ -340,4 +340,4 @@ void mesh_set_velocities_device(uint64_t id, wp::array_t<wp::vec3> velocities)
|
|
|
340
340
|
fprintf(stderr, "The mesh id provided to mesh_set_velocities_device is not valid!\n");
|
|
341
341
|
return;
|
|
342
342
|
}
|
|
343
|
-
}
|
|
343
|
+
}
|
warp/native/range.h
CHANGED
|
@@ -97,8 +97,17 @@ CUDA_CALLABLE inline range_t iter_reverse(const range_t& r)
|
|
|
97
97
|
{
|
|
98
98
|
// generates a reverse range, equivalent to reversed(range())
|
|
99
99
|
range_t rev;
|
|
100
|
-
|
|
101
|
-
|
|
100
|
+
|
|
101
|
+
if (r.step > 0)
|
|
102
|
+
{
|
|
103
|
+
rev.start = r.start + int((r.end - r.start - 1) / r.step) * r.step;
|
|
104
|
+
}
|
|
105
|
+
else
|
|
106
|
+
{
|
|
107
|
+
rev.start = r.start + int((r.end - r.start + 1) / r.step) * r.step;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
rev.end = r.start - r.step;
|
|
102
111
|
rev.step = -r.step;
|
|
103
112
|
|
|
104
113
|
rev.i = rev.start;
|
warp/sim/integrator_xpbd.py
CHANGED
|
@@ -2808,12 +2808,8 @@ class XPBDIntegrator(Integrator):
|
|
|
2808
2808
|
|
|
2809
2809
|
with wp.ScopedTimer("simulate", False):
|
|
2810
2810
|
if model.particle_count:
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
particle_qd = state_out.particle_qd
|
|
2814
|
-
else:
|
|
2815
|
-
particle_q = state_out.particle_q
|
|
2816
|
-
particle_qd = state_out.particle_qd
|
|
2811
|
+
particle_q = state_out.particle_q
|
|
2812
|
+
particle_qd = state_out.particle_qd
|
|
2817
2813
|
|
|
2818
2814
|
self.particle_q_init = wp.clone(state_in.particle_q)
|
|
2819
2815
|
if self.enable_restitution:
|
warp/sim/model.py
CHANGED
|
@@ -641,7 +641,7 @@ class Model:
|
|
|
641
641
|
joint_dof_count (int): Total number of velocity degrees of freedom of all joints in the system
|
|
642
642
|
joint_coord_count (int): Total number of position degrees of freedom of all joints in the system
|
|
643
643
|
|
|
644
|
-
particle_coloring (list of array): The coloring of all the particles, used for VBD's Gauss-Seidel
|
|
644
|
+
particle_coloring (list of array): The coloring of all the particles, used for VBD's Gauss-Seidel iteration.
|
|
645
645
|
|
|
646
646
|
device (wp.Device): Device on which the Model was allocated
|
|
647
647
|
|
|
@@ -1404,9 +1404,8 @@ class ModelBuilder:
|
|
|
1404
1404
|
self.joint_X_p.extend(joint_X_p)
|
|
1405
1405
|
self.joint_q.extend(joint_q)
|
|
1406
1406
|
|
|
1407
|
-
self.add_articulation()
|
|
1408
|
-
|
|
1409
1407
|
# offset the indices
|
|
1408
|
+
self.articulation_start.extend([a + self.joint_count for a in builder.articulation_start])
|
|
1410
1409
|
self.joint_parent.extend([p + self.joint_count if p != -1 else -1 for p in builder.joint_parent])
|
|
1411
1410
|
self.joint_child.extend([c + self.joint_count for c in builder.joint_child])
|
|
1412
1411
|
|
|
@@ -4061,7 +4060,7 @@ class ModelBuilder:
|
|
|
4061
4060
|
radius_mean: float = default_particle_radius,
|
|
4062
4061
|
radius_std: float = 0.0,
|
|
4063
4062
|
):
|
|
4064
|
-
rng = np.random.default_rng()
|
|
4063
|
+
rng = np.random.default_rng(42)
|
|
4065
4064
|
for z in range(dim_z):
|
|
4066
4065
|
for y in range(dim_y):
|
|
4067
4066
|
for x in range(dim_x):
|
|
@@ -4071,7 +4070,7 @@ class ModelBuilder:
|
|
|
4071
4070
|
p = wp.quat_rotate(rot, v) + pos + wp.vec3(rng.random(3) * jitter)
|
|
4072
4071
|
|
|
4073
4072
|
if radius_std > 0.0:
|
|
4074
|
-
r = radius_mean +
|
|
4073
|
+
r = radius_mean + rng.standard_normal() * radius_std
|
|
4075
4074
|
else:
|
|
4076
4075
|
r = radius_mean
|
|
4077
4076
|
self.add_particle(p, vel, m, r)
|