warp-lang 1.5.1__py3-none-manylinux2014_aarch64.whl → 1.6.1__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (131) hide show
  1. warp/__init__.py +5 -0
  2. warp/autograd.py +414 -191
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +40 -12
  6. warp/build_dll.py +13 -6
  7. warp/builtins.py +1077 -481
  8. warp/codegen.py +250 -122
  9. warp/config.py +65 -21
  10. warp/context.py +500 -149
  11. warp/examples/assets/square_cloth.usd +0 -0
  12. warp/examples/benchmarks/benchmark_gemm.py +27 -18
  13. warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
  14. warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
  15. warp/examples/core/example_marching_cubes.py +1 -1
  16. warp/examples/core/example_mesh.py +1 -1
  17. warp/examples/core/example_torch.py +18 -34
  18. warp/examples/core/example_wave.py +1 -1
  19. warp/examples/fem/example_apic_fluid.py +1 -0
  20. warp/examples/fem/example_mixed_elasticity.py +1 -1
  21. warp/examples/optim/example_bounce.py +1 -1
  22. warp/examples/optim/example_cloth_throw.py +1 -1
  23. warp/examples/optim/example_diffray.py +4 -15
  24. warp/examples/optim/example_drone.py +1 -1
  25. warp/examples/optim/example_softbody_properties.py +392 -0
  26. warp/examples/optim/example_trajectory.py +1 -3
  27. warp/examples/optim/example_walker.py +5 -0
  28. warp/examples/sim/example_cartpole.py +0 -2
  29. warp/examples/sim/example_cloth_self_contact.py +314 -0
  30. warp/examples/sim/example_granular_collision_sdf.py +4 -5
  31. warp/examples/sim/example_jacobian_ik.py +0 -2
  32. warp/examples/sim/example_quadruped.py +5 -2
  33. warp/examples/tile/example_tile_cholesky.py +79 -0
  34. warp/examples/tile/example_tile_convolution.py +2 -2
  35. warp/examples/tile/example_tile_fft.py +2 -2
  36. warp/examples/tile/example_tile_filtering.py +3 -3
  37. warp/examples/tile/example_tile_matmul.py +4 -4
  38. warp/examples/tile/example_tile_mlp.py +12 -12
  39. warp/examples/tile/example_tile_nbody.py +191 -0
  40. warp/examples/tile/example_tile_walker.py +319 -0
  41. warp/math.py +147 -0
  42. warp/native/array.h +12 -0
  43. warp/native/builtin.h +0 -1
  44. warp/native/bvh.cpp +149 -70
  45. warp/native/bvh.cu +287 -68
  46. warp/native/bvh.h +195 -85
  47. warp/native/clang/clang.cpp +6 -2
  48. warp/native/crt.h +1 -0
  49. warp/native/cuda_util.cpp +35 -0
  50. warp/native/cuda_util.h +5 -0
  51. warp/native/exports.h +40 -40
  52. warp/native/intersect.h +17 -0
  53. warp/native/mat.h +57 -3
  54. warp/native/mathdx.cpp +19 -0
  55. warp/native/mesh.cpp +25 -8
  56. warp/native/mesh.cu +153 -101
  57. warp/native/mesh.h +482 -403
  58. warp/native/quat.h +40 -0
  59. warp/native/solid_angle.h +7 -0
  60. warp/native/sort.cpp +85 -0
  61. warp/native/sort.cu +34 -0
  62. warp/native/sort.h +3 -1
  63. warp/native/spatial.h +11 -0
  64. warp/native/tile.h +1189 -664
  65. warp/native/tile_reduce.h +8 -6
  66. warp/native/vec.h +41 -0
  67. warp/native/warp.cpp +8 -1
  68. warp/native/warp.cu +263 -40
  69. warp/native/warp.h +19 -5
  70. warp/optim/linear.py +22 -4
  71. warp/render/render_opengl.py +132 -59
  72. warp/render/render_usd.py +10 -2
  73. warp/sim/__init__.py +6 -1
  74. warp/sim/collide.py +289 -32
  75. warp/sim/import_urdf.py +20 -5
  76. warp/sim/integrator_euler.py +25 -7
  77. warp/sim/integrator_featherstone.py +147 -35
  78. warp/sim/integrator_vbd.py +842 -40
  79. warp/sim/model.py +173 -112
  80. warp/sim/render.py +2 -2
  81. warp/stubs.py +249 -116
  82. warp/tape.py +28 -30
  83. warp/tests/aux_test_module_unload.py +15 -0
  84. warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
  85. warp/tests/test_array.py +100 -0
  86. warp/tests/test_assert.py +242 -0
  87. warp/tests/test_codegen.py +14 -61
  88. warp/tests/test_collision.py +8 -8
  89. warp/tests/test_examples.py +16 -1
  90. warp/tests/test_grad_debug.py +87 -2
  91. warp/tests/test_hash_grid.py +1 -1
  92. warp/tests/test_ipc.py +116 -0
  93. warp/tests/test_launch.py +77 -26
  94. warp/tests/test_mat.py +213 -168
  95. warp/tests/test_math.py +47 -1
  96. warp/tests/test_matmul.py +11 -7
  97. warp/tests/test_matmul_lite.py +4 -4
  98. warp/tests/test_mesh.py +84 -60
  99. warp/tests/test_mesh_query_aabb.py +165 -0
  100. warp/tests/test_mesh_query_point.py +328 -286
  101. warp/tests/test_mesh_query_ray.py +134 -121
  102. warp/tests/test_mlp.py +2 -2
  103. warp/tests/test_operators.py +43 -0
  104. warp/tests/test_overwrite.py +6 -5
  105. warp/tests/test_quat.py +77 -0
  106. warp/tests/test_reload.py +29 -0
  107. warp/tests/test_sim_grad_bounce_linear.py +204 -0
  108. warp/tests/test_static.py +16 -0
  109. warp/tests/test_tape.py +25 -0
  110. warp/tests/test_tile.py +134 -191
  111. warp/tests/test_tile_load.py +399 -0
  112. warp/tests/test_tile_mathdx.py +61 -8
  113. warp/tests/test_tile_mlp.py +17 -17
  114. warp/tests/test_tile_reduce.py +24 -18
  115. warp/tests/test_tile_shared_memory.py +66 -17
  116. warp/tests/test_tile_view.py +165 -0
  117. warp/tests/test_torch.py +35 -0
  118. warp/tests/test_utils.py +36 -24
  119. warp/tests/test_vec.py +110 -0
  120. warp/tests/unittest_suites.py +29 -4
  121. warp/tests/unittest_utils.py +30 -11
  122. warp/thirdparty/unittest_parallel.py +5 -2
  123. warp/types.py +419 -111
  124. warp/utils.py +9 -5
  125. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/METADATA +86 -45
  126. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/RECORD +129 -118
  127. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/WHEEL +1 -1
  128. warp/examples/benchmarks/benchmark_tile.py +0 -179
  129. warp/native/tile_gemm.h +0 -341
  130. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/LICENSE.md +0 -0
  131. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/top_level.txt +0 -0
warp/native/quat.h CHANGED
@@ -487,6 +487,37 @@ inline CUDA_CALLABLE void adj_indexref(quat_t<Type>* q, int idx,
487
487
  // nop
488
488
  }
489
489
 
490
+
491
+ template<typename Type>
492
+ inline CUDA_CALLABLE void augassign_add(quat_t<Type>& q, int idx, Type value)
493
+ {
494
+ q[idx] += value;
495
+ }
496
+
497
+
498
+ template<typename Type>
499
+ inline CUDA_CALLABLE void adj_augassign_add(quat_t<Type>& q, int idx, Type value,
500
+ quat_t<Type>& adj_q, int adj_idx, Type& adj_value)
501
+ {
502
+ adj_value += adj_q[idx];
503
+ }
504
+
505
+
506
+ template<typename Type>
507
+ inline CUDA_CALLABLE void augassign_sub(quat_t<Type>& q, int idx, Type value)
508
+ {
509
+ q[idx] -= value;
510
+ }
511
+
512
+
513
+ template<typename Type>
514
+ inline CUDA_CALLABLE void adj_augassign_sub(quat_t<Type>& q, int idx, Type value,
515
+ quat_t<Type>& adj_q, int adj_idx, Type& adj_value)
516
+ {
517
+ adj_value -= adj_q[idx];
518
+ }
519
+
520
+
490
521
  template<typename Type>
491
522
  inline CUDA_CALLABLE quat_t<Type> assign(quat_t<Type>& q, int idx, Type value)
492
523
  {
@@ -1229,6 +1260,15 @@ inline CUDA_CALLABLE quat_t<Type> quat_identity()
1229
1260
  return quat_t<Type>(Type(0), Type(0), Type(0), Type(1));
1230
1261
  }
1231
1262
 
1263
+ template<typename Type>
1264
+ CUDA_CALLABLE inline int len(const quat_t<Type>& x)
1265
+ {
1266
+ return 4;
1267
+ }
1232
1268
 
1269
+ template<typename Type>
1270
+ CUDA_CALLABLE inline void adj_len(const quat_t<Type>& x, quat_t<Type>& adj_x, const int& adj_ret)
1271
+ {
1272
+ }
1233
1273
 
1234
1274
  } // namespace wp
warp/native/solid_angle.h CHANGED
@@ -357,6 +357,13 @@ CUDA_CALLABLE inline void combine_precomputed_solid_angle_props(SolidAngleProps
357
357
  my_data.max_p_dist_sq = length_sq(max(my_data.average_p - my_data.box.lower, my_data.box.upper - my_data.average_p));
358
358
  }
359
359
 
360
+ CUDA_CALLABLE inline SolidAngleProps combine_precomputed_solid_angle_props(const SolidAngleProps* left_child_data, const SolidAngleProps* right_child_data)
361
+ {
362
+ SolidAngleProps my_data;
363
+ combine_precomputed_solid_angle_props(my_data, left_child_data, right_child_data);
364
+ return my_data;
365
+ }
366
+
360
367
  // Return whether need to
361
368
  CUDA_CALLABLE inline bool evaluate_node_solid_angle(const vec3 &query_point, SolidAngleProps *current_data, float &solid_angle, const float accuracy_scale_sq)
362
369
  {
warp/native/sort.cpp CHANGED
@@ -77,12 +77,90 @@ void radix_sort_pairs_host(int* keys, int* values, int n)
77
77
  }
78
78
  }
79
79
 
80
+ //http://stereopsis.com/radix.html
81
+ inline unsigned int radix_float_to_int(float f)
82
+ {
83
+ unsigned int i = reinterpret_cast<unsigned int&>(f);
84
+ unsigned int mask = (unsigned int)(-(int)(i >> 31)) | 0x80000000;
85
+ return i ^ mask;
86
+ }
87
+
88
+ void radix_sort_pairs_host(float* keys, int* values, int n)
89
+ {
90
+ static unsigned int tables[2][1 << 16];
91
+ memset(tables, 0, sizeof(tables));
92
+
93
+ float* auxKeys = keys + n;
94
+ int* auxValues = values + n;
95
+
96
+ // build histograms
97
+ for (int i=0; i < n; ++i)
98
+ {
99
+ const unsigned int k = radix_float_to_int(keys[i]);
100
+ const unsigned short low = k & 0xffff;
101
+ const unsigned short high = k >> 16;
102
+
103
+ ++tables[0][low];
104
+ ++tables[1][high];
105
+ }
106
+
107
+ // convert histograms to offset tables in-place
108
+ unsigned int offlow = 0;
109
+ unsigned int offhigh = 0;
110
+
111
+ for (int i=0; i < 65536; ++i)
112
+ {
113
+ const unsigned int newofflow = offlow + tables[0][i];
114
+ const unsigned int newoffhigh = offhigh + tables[1][i];
115
+
116
+ tables[0][i] = offlow;
117
+ tables[1][i] = offhigh;
118
+
119
+ offlow = newofflow;
120
+ offhigh = newoffhigh;
121
+ }
122
+
123
+ // pass 1 - sort by low 16 bits
124
+ for (int i=0; i < n; ++i)
125
+ {
126
+ // lookup offset of input
127
+ const float f = keys[i];
128
+ const unsigned int k = radix_float_to_int(f);
129
+ const int v = values[i];
130
+ const unsigned int b = k & 0xffff;
131
+
132
+ // find offset and increment
133
+ const unsigned int offset = tables[0][b]++;
134
+
135
+ auxKeys[offset] = f;
136
+ auxValues[offset] = v;
137
+ }
138
+
139
+ // pass 2 - sort by high 16 bits
140
+ for (int i=0; i < n; ++i)
141
+ {
142
+ // lookup offset of input
143
+ const float f = auxKeys[i];
144
+ const unsigned int k = radix_float_to_int(f);
145
+ const int v = auxValues[i];
146
+
147
+ const unsigned int b = k >> 16;
148
+
149
+ const unsigned int offset = tables[1][b]++;
150
+
151
+ keys[offset] = f;
152
+ values[offset] = v;
153
+ }
154
+ }
155
+
80
156
  #if !WP_ENABLE_CUDA
81
157
 
82
158
  void radix_sort_reserve(void* context, int n, void** mem_out, size_t* size_out) {}
83
159
 
84
160
  void radix_sort_pairs_int_device(uint64_t keys, uint64_t values, int n) {}
85
161
 
162
+ void radix_sort_pairs_float_device(uint64_t keys, uint64_t values, int n) {}
163
+
86
164
  #endif // !WP_ENABLE_CUDA
87
165
 
88
166
 
@@ -92,3 +170,10 @@ void radix_sort_pairs_int_host(uint64_t keys, uint64_t values, int n)
92
170
  reinterpret_cast<int *>(keys),
93
171
  reinterpret_cast<int *>(values), n);
94
172
  }
173
+
174
+ void radix_sort_pairs_float_host(uint64_t keys, uint64_t values, int n)
175
+ {
176
+ radix_sort_pairs_host(
177
+ reinterpret_cast<float *>(keys),
178
+ reinterpret_cast<int *>(values), n);
179
+ }
warp/native/sort.cu CHANGED
@@ -95,3 +95,37 @@ void radix_sort_pairs_int_device(uint64_t keys, uint64_t values, int n)
95
95
  reinterpret_cast<int *>(keys),
96
96
  reinterpret_cast<int *>(values), n);
97
97
  }
98
+
99
+ void radix_sort_pairs_device(void* context, float* keys, int* values, int n)
100
+ {
101
+ ContextGuard guard(context);
102
+
103
+ cub::DoubleBuffer<float> d_keys(keys, keys + n);
104
+ cub::DoubleBuffer<int> d_values(values, values + n);
105
+
106
+ RadixSortTemp temp;
107
+ radix_sort_reserve(WP_CURRENT_CONTEXT, n, &temp.mem, &temp.size);
108
+
109
+ // sort
110
+ check_cuda(cub::DeviceRadixSort::SortPairs(
111
+ temp.mem,
112
+ temp.size,
113
+ d_keys,
114
+ d_values,
115
+ n, 0, 32,
116
+ (cudaStream_t)cuda_stream_get_current()));
117
+
118
+ if (d_keys.Current() != keys)
119
+ memcpy_d2d(WP_CURRENT_CONTEXT, keys, d_keys.Current(), sizeof(float)*n);
120
+
121
+ if (d_values.Current() != values)
122
+ memcpy_d2d(WP_CURRENT_CONTEXT, values, d_values.Current(), sizeof(int)*n);
123
+ }
124
+
125
+ void radix_sort_pairs_float_device(uint64_t keys, uint64_t values, int n)
126
+ {
127
+ radix_sort_pairs_device(
128
+ WP_CURRENT_CONTEXT,
129
+ reinterpret_cast<float *>(keys),
130
+ reinterpret_cast<int *>(values), n);
131
+ }
warp/native/sort.h CHANGED
@@ -12,4 +12,6 @@
12
12
 
13
13
  void radix_sort_reserve(void* context, int n, void** mem_out=NULL, size_t* size_out=NULL);
14
14
  void radix_sort_pairs_host(int* keys, int* values, int n);
15
- void radix_sort_pairs_device(void* context, int* keys, int* values, int n);
15
+ void radix_sort_pairs_host(float* keys, int* values, int n);
16
+ void radix_sort_pairs_device(void* context, int* keys, int* values, int n);
17
+ void radix_sort_pairs_device(void* context, float* keys, int* values, int n);
warp/native/spatial.h CHANGED
@@ -400,6 +400,17 @@ CUDA_CALLABLE inline void adj_lerp(const transform_t<Type>& a, const transform_t
400
400
  adj_t += tensordot(b, adj_ret) - tensordot(a, adj_ret);
401
401
  }
402
402
 
403
+ template<typename Type>
404
+ CUDA_CALLABLE inline int len(const transform_t<Type>& t)
405
+ {
406
+ return 7;
407
+ }
408
+
409
+ template<typename Type>
410
+ CUDA_CALLABLE inline void adj_len(const transform_t<Type>& t, transform_t<Type>& adj_t, const int& adj_ret)
411
+ {
412
+ }
413
+
403
414
  template<typename Type>
404
415
  using spatial_matrix_t = mat_t<6,6,Type>;
405
416