warp-lang 1.5.1__py3-none-win_amd64.whl → 1.6.1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +5 -0
- warp/autograd.py +414 -191
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +40 -12
- warp/build_dll.py +13 -6
- warp/builtins.py +1077 -481
- warp/codegen.py +250 -122
- warp/config.py +65 -21
- warp/context.py +500 -149
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_gemm.py +27 -18
- warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
- warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
- warp/examples/core/example_marching_cubes.py +1 -1
- warp/examples/core/example_mesh.py +1 -1
- warp/examples/core/example_torch.py +18 -34
- warp/examples/core/example_wave.py +1 -1
- warp/examples/fem/example_apic_fluid.py +1 -0
- warp/examples/fem/example_mixed_elasticity.py +1 -1
- warp/examples/optim/example_bounce.py +1 -1
- warp/examples/optim/example_cloth_throw.py +1 -1
- warp/examples/optim/example_diffray.py +4 -15
- warp/examples/optim/example_drone.py +1 -1
- warp/examples/optim/example_softbody_properties.py +392 -0
- warp/examples/optim/example_trajectory.py +1 -3
- warp/examples/optim/example_walker.py +5 -0
- warp/examples/sim/example_cartpole.py +0 -2
- warp/examples/sim/example_cloth_self_contact.py +314 -0
- warp/examples/sim/example_granular_collision_sdf.py +4 -5
- warp/examples/sim/example_jacobian_ik.py +0 -2
- warp/examples/sim/example_quadruped.py +5 -2
- warp/examples/tile/example_tile_cholesky.py +79 -0
- warp/examples/tile/example_tile_convolution.py +2 -2
- warp/examples/tile/example_tile_fft.py +2 -2
- warp/examples/tile/example_tile_filtering.py +3 -3
- warp/examples/tile/example_tile_matmul.py +4 -4
- warp/examples/tile/example_tile_mlp.py +12 -12
- warp/examples/tile/example_tile_nbody.py +191 -0
- warp/examples/tile/example_tile_walker.py +319 -0
- warp/math.py +147 -0
- warp/native/array.h +12 -0
- warp/native/builtin.h +0 -1
- warp/native/bvh.cpp +149 -70
- warp/native/bvh.cu +287 -68
- warp/native/bvh.h +195 -85
- warp/native/clang/clang.cpp +6 -2
- warp/native/crt.h +1 -0
- warp/native/cuda_util.cpp +35 -0
- warp/native/cuda_util.h +5 -0
- warp/native/exports.h +40 -40
- warp/native/intersect.h +17 -0
- warp/native/mat.h +57 -3
- warp/native/mathdx.cpp +19 -0
- warp/native/mesh.cpp +25 -8
- warp/native/mesh.cu +153 -101
- warp/native/mesh.h +482 -403
- warp/native/quat.h +40 -0
- warp/native/solid_angle.h +7 -0
- warp/native/sort.cpp +85 -0
- warp/native/sort.cu +34 -0
- warp/native/sort.h +3 -1
- warp/native/spatial.h +11 -0
- warp/native/tile.h +1189 -664
- warp/native/tile_reduce.h +8 -6
- warp/native/vec.h +41 -0
- warp/native/warp.cpp +8 -1
- warp/native/warp.cu +263 -40
- warp/native/warp.h +19 -5
- warp/optim/linear.py +22 -4
- warp/render/render_opengl.py +132 -59
- warp/render/render_usd.py +10 -2
- warp/sim/__init__.py +6 -1
- warp/sim/collide.py +289 -32
- warp/sim/import_urdf.py +20 -5
- warp/sim/integrator_euler.py +25 -7
- warp/sim/integrator_featherstone.py +147 -35
- warp/sim/integrator_vbd.py +842 -40
- warp/sim/model.py +173 -112
- warp/sim/render.py +2 -2
- warp/stubs.py +249 -116
- warp/tape.py +28 -30
- warp/tests/aux_test_module_unload.py +15 -0
- warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
- warp/tests/test_array.py +100 -0
- warp/tests/test_assert.py +242 -0
- warp/tests/test_codegen.py +14 -61
- warp/tests/test_collision.py +8 -8
- warp/tests/test_examples.py +16 -1
- warp/tests/test_grad_debug.py +87 -2
- warp/tests/test_hash_grid.py +1 -1
- warp/tests/test_ipc.py +116 -0
- warp/tests/test_launch.py +77 -26
- warp/tests/test_mat.py +213 -168
- warp/tests/test_math.py +47 -1
- warp/tests/test_matmul.py +11 -7
- warp/tests/test_matmul_lite.py +4 -4
- warp/tests/test_mesh.py +84 -60
- warp/tests/test_mesh_query_aabb.py +165 -0
- warp/tests/test_mesh_query_point.py +328 -286
- warp/tests/test_mesh_query_ray.py +134 -121
- warp/tests/test_mlp.py +2 -2
- warp/tests/test_operators.py +43 -0
- warp/tests/test_overwrite.py +6 -5
- warp/tests/test_quat.py +77 -0
- warp/tests/test_reload.py +29 -0
- warp/tests/test_sim_grad_bounce_linear.py +204 -0
- warp/tests/test_static.py +16 -0
- warp/tests/test_tape.py +25 -0
- warp/tests/test_tile.py +134 -191
- warp/tests/test_tile_load.py +399 -0
- warp/tests/test_tile_mathdx.py +61 -8
- warp/tests/test_tile_mlp.py +17 -17
- warp/tests/test_tile_reduce.py +24 -18
- warp/tests/test_tile_shared_memory.py +66 -17
- warp/tests/test_tile_view.py +165 -0
- warp/tests/test_torch.py +35 -0
- warp/tests/test_utils.py +36 -24
- warp/tests/test_vec.py +110 -0
- warp/tests/unittest_suites.py +29 -4
- warp/tests/unittest_utils.py +30 -11
- warp/thirdparty/unittest_parallel.py +5 -2
- warp/types.py +419 -111
- warp/utils.py +9 -5
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/METADATA +86 -45
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/RECORD +129 -118
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/WHEEL +1 -1
- warp/examples/benchmarks/benchmark_tile.py +0 -179
- warp/native/tile_gemm.h +0 -341
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/top_level.txt +0 -0
warp/native/quat.h
CHANGED
|
@@ -487,6 +487,37 @@ inline CUDA_CALLABLE void adj_indexref(quat_t<Type>* q, int idx,
|
|
|
487
487
|
// nop
|
|
488
488
|
}
|
|
489
489
|
|
|
490
|
+
|
|
491
|
+
template<typename Type>
|
|
492
|
+
inline CUDA_CALLABLE void augassign_add(quat_t<Type>& q, int idx, Type value)
|
|
493
|
+
{
|
|
494
|
+
q[idx] += value;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
template<typename Type>
|
|
499
|
+
inline CUDA_CALLABLE void adj_augassign_add(quat_t<Type>& q, int idx, Type value,
|
|
500
|
+
quat_t<Type>& adj_q, int adj_idx, Type& adj_value)
|
|
501
|
+
{
|
|
502
|
+
adj_value += adj_q[idx];
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
template<typename Type>
|
|
507
|
+
inline CUDA_CALLABLE void augassign_sub(quat_t<Type>& q, int idx, Type value)
|
|
508
|
+
{
|
|
509
|
+
q[idx] -= value;
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
template<typename Type>
|
|
514
|
+
inline CUDA_CALLABLE void adj_augassign_sub(quat_t<Type>& q, int idx, Type value,
|
|
515
|
+
quat_t<Type>& adj_q, int adj_idx, Type& adj_value)
|
|
516
|
+
{
|
|
517
|
+
adj_value -= adj_q[idx];
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
|
|
490
521
|
template<typename Type>
|
|
491
522
|
inline CUDA_CALLABLE quat_t<Type> assign(quat_t<Type>& q, int idx, Type value)
|
|
492
523
|
{
|
|
@@ -1229,6 +1260,15 @@ inline CUDA_CALLABLE quat_t<Type> quat_identity()
|
|
|
1229
1260
|
return quat_t<Type>(Type(0), Type(0), Type(0), Type(1));
|
|
1230
1261
|
}
|
|
1231
1262
|
|
|
1263
|
+
template<typename Type>
|
|
1264
|
+
CUDA_CALLABLE inline int len(const quat_t<Type>& x)
|
|
1265
|
+
{
|
|
1266
|
+
return 4;
|
|
1267
|
+
}
|
|
1232
1268
|
|
|
1269
|
+
template<typename Type>
|
|
1270
|
+
CUDA_CALLABLE inline void adj_len(const quat_t<Type>& x, quat_t<Type>& adj_x, const int& adj_ret)
|
|
1271
|
+
{
|
|
1272
|
+
}
|
|
1233
1273
|
|
|
1234
1274
|
} // namespace wp
|
warp/native/solid_angle.h
CHANGED
|
@@ -357,6 +357,13 @@ CUDA_CALLABLE inline void combine_precomputed_solid_angle_props(SolidAngleProps
|
|
|
357
357
|
my_data.max_p_dist_sq = length_sq(max(my_data.average_p - my_data.box.lower, my_data.box.upper - my_data.average_p));
|
|
358
358
|
}
|
|
359
359
|
|
|
360
|
+
CUDA_CALLABLE inline SolidAngleProps combine_precomputed_solid_angle_props(const SolidAngleProps* left_child_data, const SolidAngleProps* right_child_data)
|
|
361
|
+
{
|
|
362
|
+
SolidAngleProps my_data;
|
|
363
|
+
combine_precomputed_solid_angle_props(my_data, left_child_data, right_child_data);
|
|
364
|
+
return my_data;
|
|
365
|
+
}
|
|
366
|
+
|
|
360
367
|
// Return whether need to
|
|
361
368
|
CUDA_CALLABLE inline bool evaluate_node_solid_angle(const vec3 &query_point, SolidAngleProps *current_data, float &solid_angle, const float accuracy_scale_sq)
|
|
362
369
|
{
|
warp/native/sort.cpp
CHANGED
|
@@ -77,12 +77,90 @@ void radix_sort_pairs_host(int* keys, int* values, int n)
|
|
|
77
77
|
}
|
|
78
78
|
}
|
|
79
79
|
|
|
80
|
+
//http://stereopsis.com/radix.html
|
|
81
|
+
inline unsigned int radix_float_to_int(float f)
|
|
82
|
+
{
|
|
83
|
+
unsigned int i = reinterpret_cast<unsigned int&>(f);
|
|
84
|
+
unsigned int mask = (unsigned int)(-(int)(i >> 31)) | 0x80000000;
|
|
85
|
+
return i ^ mask;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
void radix_sort_pairs_host(float* keys, int* values, int n)
|
|
89
|
+
{
|
|
90
|
+
static unsigned int tables[2][1 << 16];
|
|
91
|
+
memset(tables, 0, sizeof(tables));
|
|
92
|
+
|
|
93
|
+
float* auxKeys = keys + n;
|
|
94
|
+
int* auxValues = values + n;
|
|
95
|
+
|
|
96
|
+
// build histograms
|
|
97
|
+
for (int i=0; i < n; ++i)
|
|
98
|
+
{
|
|
99
|
+
const unsigned int k = radix_float_to_int(keys[i]);
|
|
100
|
+
const unsigned short low = k & 0xffff;
|
|
101
|
+
const unsigned short high = k >> 16;
|
|
102
|
+
|
|
103
|
+
++tables[0][low];
|
|
104
|
+
++tables[1][high];
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// convert histograms to offset tables in-place
|
|
108
|
+
unsigned int offlow = 0;
|
|
109
|
+
unsigned int offhigh = 0;
|
|
110
|
+
|
|
111
|
+
for (int i=0; i < 65536; ++i)
|
|
112
|
+
{
|
|
113
|
+
const unsigned int newofflow = offlow + tables[0][i];
|
|
114
|
+
const unsigned int newoffhigh = offhigh + tables[1][i];
|
|
115
|
+
|
|
116
|
+
tables[0][i] = offlow;
|
|
117
|
+
tables[1][i] = offhigh;
|
|
118
|
+
|
|
119
|
+
offlow = newofflow;
|
|
120
|
+
offhigh = newoffhigh;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// pass 1 - sort by low 16 bits
|
|
124
|
+
for (int i=0; i < n; ++i)
|
|
125
|
+
{
|
|
126
|
+
// lookup offset of input
|
|
127
|
+
const float f = keys[i];
|
|
128
|
+
const unsigned int k = radix_float_to_int(f);
|
|
129
|
+
const int v = values[i];
|
|
130
|
+
const unsigned int b = k & 0xffff;
|
|
131
|
+
|
|
132
|
+
// find offset and increment
|
|
133
|
+
const unsigned int offset = tables[0][b]++;
|
|
134
|
+
|
|
135
|
+
auxKeys[offset] = f;
|
|
136
|
+
auxValues[offset] = v;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// pass 2 - sort by high 16 bits
|
|
140
|
+
for (int i=0; i < n; ++i)
|
|
141
|
+
{
|
|
142
|
+
// lookup offset of input
|
|
143
|
+
const float f = auxKeys[i];
|
|
144
|
+
const unsigned int k = radix_float_to_int(f);
|
|
145
|
+
const int v = auxValues[i];
|
|
146
|
+
|
|
147
|
+
const unsigned int b = k >> 16;
|
|
148
|
+
|
|
149
|
+
const unsigned int offset = tables[1][b]++;
|
|
150
|
+
|
|
151
|
+
keys[offset] = f;
|
|
152
|
+
values[offset] = v;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
80
156
|
#if !WP_ENABLE_CUDA
|
|
81
157
|
|
|
82
158
|
void radix_sort_reserve(void* context, int n, void** mem_out, size_t* size_out) {}
|
|
83
159
|
|
|
84
160
|
void radix_sort_pairs_int_device(uint64_t keys, uint64_t values, int n) {}
|
|
85
161
|
|
|
162
|
+
void radix_sort_pairs_float_device(uint64_t keys, uint64_t values, int n) {}
|
|
163
|
+
|
|
86
164
|
#endif // !WP_ENABLE_CUDA
|
|
87
165
|
|
|
88
166
|
|
|
@@ -92,3 +170,10 @@ void radix_sort_pairs_int_host(uint64_t keys, uint64_t values, int n)
|
|
|
92
170
|
reinterpret_cast<int *>(keys),
|
|
93
171
|
reinterpret_cast<int *>(values), n);
|
|
94
172
|
}
|
|
173
|
+
|
|
174
|
+
void radix_sort_pairs_float_host(uint64_t keys, uint64_t values, int n)
|
|
175
|
+
{
|
|
176
|
+
radix_sort_pairs_host(
|
|
177
|
+
reinterpret_cast<float *>(keys),
|
|
178
|
+
reinterpret_cast<int *>(values), n);
|
|
179
|
+
}
|
warp/native/sort.cu
CHANGED
|
@@ -95,3 +95,37 @@ void radix_sort_pairs_int_device(uint64_t keys, uint64_t values, int n)
|
|
|
95
95
|
reinterpret_cast<int *>(keys),
|
|
96
96
|
reinterpret_cast<int *>(values), n);
|
|
97
97
|
}
|
|
98
|
+
|
|
99
|
+
void radix_sort_pairs_device(void* context, float* keys, int* values, int n)
|
|
100
|
+
{
|
|
101
|
+
ContextGuard guard(context);
|
|
102
|
+
|
|
103
|
+
cub::DoubleBuffer<float> d_keys(keys, keys + n);
|
|
104
|
+
cub::DoubleBuffer<int> d_values(values, values + n);
|
|
105
|
+
|
|
106
|
+
RadixSortTemp temp;
|
|
107
|
+
radix_sort_reserve(WP_CURRENT_CONTEXT, n, &temp.mem, &temp.size);
|
|
108
|
+
|
|
109
|
+
// sort
|
|
110
|
+
check_cuda(cub::DeviceRadixSort::SortPairs(
|
|
111
|
+
temp.mem,
|
|
112
|
+
temp.size,
|
|
113
|
+
d_keys,
|
|
114
|
+
d_values,
|
|
115
|
+
n, 0, 32,
|
|
116
|
+
(cudaStream_t)cuda_stream_get_current()));
|
|
117
|
+
|
|
118
|
+
if (d_keys.Current() != keys)
|
|
119
|
+
memcpy_d2d(WP_CURRENT_CONTEXT, keys, d_keys.Current(), sizeof(float)*n);
|
|
120
|
+
|
|
121
|
+
if (d_values.Current() != values)
|
|
122
|
+
memcpy_d2d(WP_CURRENT_CONTEXT, values, d_values.Current(), sizeof(int)*n);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
void radix_sort_pairs_float_device(uint64_t keys, uint64_t values, int n)
|
|
126
|
+
{
|
|
127
|
+
radix_sort_pairs_device(
|
|
128
|
+
WP_CURRENT_CONTEXT,
|
|
129
|
+
reinterpret_cast<float *>(keys),
|
|
130
|
+
reinterpret_cast<int *>(values), n);
|
|
131
|
+
}
|
warp/native/sort.h
CHANGED
|
@@ -12,4 +12,6 @@
|
|
|
12
12
|
|
|
13
13
|
void radix_sort_reserve(void* context, int n, void** mem_out=NULL, size_t* size_out=NULL);
|
|
14
14
|
void radix_sort_pairs_host(int* keys, int* values, int n);
|
|
15
|
-
void
|
|
15
|
+
void radix_sort_pairs_host(float* keys, int* values, int n);
|
|
16
|
+
void radix_sort_pairs_device(void* context, int* keys, int* values, int n);
|
|
17
|
+
void radix_sort_pairs_device(void* context, float* keys, int* values, int n);
|
warp/native/spatial.h
CHANGED
|
@@ -400,6 +400,17 @@ CUDA_CALLABLE inline void adj_lerp(const transform_t<Type>& a, const transform_t
|
|
|
400
400
|
adj_t += tensordot(b, adj_ret) - tensordot(a, adj_ret);
|
|
401
401
|
}
|
|
402
402
|
|
|
403
|
+
template<typename Type>
|
|
404
|
+
CUDA_CALLABLE inline int len(const transform_t<Type>& t)
|
|
405
|
+
{
|
|
406
|
+
return 7;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
template<typename Type>
|
|
410
|
+
CUDA_CALLABLE inline void adj_len(const transform_t<Type>& t, transform_t<Type>& adj_t, const int& adj_ret)
|
|
411
|
+
{
|
|
412
|
+
}
|
|
413
|
+
|
|
403
414
|
template<typename Type>
|
|
404
415
|
using spatial_matrix_t = mat_t<6,6,Type>;
|
|
405
416
|
|