warp-lang 1.5.1__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (123) hide show
  1. warp/__init__.py +5 -0
  2. warp/autograd.py +414 -191
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +40 -12
  6. warp/build_dll.py +13 -6
  7. warp/builtins.py +1076 -480
  8. warp/codegen.py +240 -119
  9. warp/config.py +1 -1
  10. warp/context.py +298 -84
  11. warp/examples/assets/square_cloth.usd +0 -0
  12. warp/examples/benchmarks/benchmark_gemm.py +27 -18
  13. warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
  14. warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
  15. warp/examples/core/example_torch.py +18 -34
  16. warp/examples/fem/example_apic_fluid.py +1 -0
  17. warp/examples/fem/example_mixed_elasticity.py +1 -1
  18. warp/examples/optim/example_bounce.py +1 -1
  19. warp/examples/optim/example_cloth_throw.py +1 -1
  20. warp/examples/optim/example_diffray.py +4 -15
  21. warp/examples/optim/example_drone.py +1 -1
  22. warp/examples/optim/example_softbody_properties.py +392 -0
  23. warp/examples/optim/example_trajectory.py +1 -3
  24. warp/examples/optim/example_walker.py +5 -0
  25. warp/examples/sim/example_cartpole.py +0 -2
  26. warp/examples/sim/example_cloth_self_contact.py +260 -0
  27. warp/examples/sim/example_granular_collision_sdf.py +4 -5
  28. warp/examples/sim/example_jacobian_ik.py +0 -2
  29. warp/examples/sim/example_quadruped.py +5 -2
  30. warp/examples/tile/example_tile_cholesky.py +79 -0
  31. warp/examples/tile/example_tile_convolution.py +2 -2
  32. warp/examples/tile/example_tile_fft.py +2 -2
  33. warp/examples/tile/example_tile_filtering.py +3 -3
  34. warp/examples/tile/example_tile_matmul.py +4 -4
  35. warp/examples/tile/example_tile_mlp.py +12 -12
  36. warp/examples/tile/example_tile_nbody.py +180 -0
  37. warp/examples/tile/example_tile_walker.py +319 -0
  38. warp/math.py +147 -0
  39. warp/native/array.h +12 -0
  40. warp/native/builtin.h +0 -1
  41. warp/native/bvh.cpp +149 -70
  42. warp/native/bvh.cu +287 -68
  43. warp/native/bvh.h +195 -85
  44. warp/native/clang/clang.cpp +5 -1
  45. warp/native/cuda_util.cpp +35 -0
  46. warp/native/cuda_util.h +5 -0
  47. warp/native/exports.h +40 -40
  48. warp/native/intersect.h +17 -0
  49. warp/native/mat.h +41 -0
  50. warp/native/mathdx.cpp +19 -0
  51. warp/native/mesh.cpp +25 -8
  52. warp/native/mesh.cu +153 -101
  53. warp/native/mesh.h +482 -403
  54. warp/native/quat.h +40 -0
  55. warp/native/solid_angle.h +7 -0
  56. warp/native/sort.cpp +85 -0
  57. warp/native/sort.cu +34 -0
  58. warp/native/sort.h +3 -1
  59. warp/native/spatial.h +11 -0
  60. warp/native/tile.h +1185 -664
  61. warp/native/tile_reduce.h +8 -6
  62. warp/native/vec.h +41 -0
  63. warp/native/warp.cpp +8 -1
  64. warp/native/warp.cu +263 -40
  65. warp/native/warp.h +19 -5
  66. warp/optim/linear.py +22 -4
  67. warp/render/render_opengl.py +124 -59
  68. warp/sim/__init__.py +6 -1
  69. warp/sim/collide.py +270 -26
  70. warp/sim/integrator_euler.py +25 -7
  71. warp/sim/integrator_featherstone.py +154 -35
  72. warp/sim/integrator_vbd.py +842 -40
  73. warp/sim/model.py +111 -53
  74. warp/stubs.py +248 -115
  75. warp/tape.py +28 -30
  76. warp/tests/aux_test_module_unload.py +15 -0
  77. warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
  78. warp/tests/test_array.py +74 -0
  79. warp/tests/test_assert.py +242 -0
  80. warp/tests/test_codegen.py +14 -61
  81. warp/tests/test_collision.py +2 -2
  82. warp/tests/test_examples.py +9 -0
  83. warp/tests/test_grad_debug.py +87 -2
  84. warp/tests/test_hash_grid.py +1 -1
  85. warp/tests/test_ipc.py +116 -0
  86. warp/tests/test_mat.py +138 -167
  87. warp/tests/test_math.py +47 -1
  88. warp/tests/test_matmul.py +11 -7
  89. warp/tests/test_matmul_lite.py +4 -4
  90. warp/tests/test_mesh.py +84 -60
  91. warp/tests/test_mesh_query_aabb.py +165 -0
  92. warp/tests/test_mesh_query_point.py +328 -286
  93. warp/tests/test_mesh_query_ray.py +134 -121
  94. warp/tests/test_mlp.py +2 -2
  95. warp/tests/test_operators.py +43 -0
  96. warp/tests/test_overwrite.py +2 -2
  97. warp/tests/test_quat.py +77 -0
  98. warp/tests/test_reload.py +29 -0
  99. warp/tests/test_sim_grad_bounce_linear.py +204 -0
  100. warp/tests/test_static.py +16 -0
  101. warp/tests/test_tape.py +25 -0
  102. warp/tests/test_tile.py +134 -191
  103. warp/tests/test_tile_load.py +356 -0
  104. warp/tests/test_tile_mathdx.py +61 -8
  105. warp/tests/test_tile_mlp.py +17 -17
  106. warp/tests/test_tile_reduce.py +24 -18
  107. warp/tests/test_tile_shared_memory.py +66 -17
  108. warp/tests/test_tile_view.py +165 -0
  109. warp/tests/test_torch.py +35 -0
  110. warp/tests/test_utils.py +36 -24
  111. warp/tests/test_vec.py +110 -0
  112. warp/tests/unittest_suites.py +29 -4
  113. warp/tests/unittest_utils.py +30 -11
  114. warp/thirdparty/unittest_parallel.py +2 -2
  115. warp/types.py +409 -99
  116. warp/utils.py +9 -5
  117. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/METADATA +68 -44
  118. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/RECORD +121 -110
  119. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
  120. warp/examples/benchmarks/benchmark_tile.py +0 -179
  121. warp/native/tile_gemm.h +0 -341
  122. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
  123. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,319 @@
1
+ # Copyright (c) 2025 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ ###########################################################################
9
+ # Example Tile Walker
10
+ #
11
+ # Trains a tetrahedral mesh quadruped to run. Feeds 8 time-varying input
12
+ # phases as inputs into a single layer fully connected network with a tanh
13
+ # activation function. Interprets the output of the network as tet
14
+ # activations, which are fed into the wp.sim soft mesh model. This is
15
+ # simulated forward in time and then evaluated based on the center of mass
16
+ # momentum of the mesh.
17
+ #
18
+ # This example uses the Warp tile API, which as of Warp 1.6 is the
19
+ # recommended way to handle matrix multiplication. example_walker.py in
20
+ # examples/optim demonstrates the old way of doing matrix multiplication,
21
+ # wp.matmul(), which will be deprecated in a future version.
22
+ #
23
+ ###########################################################################
24
+
25
+ import math
26
+ import os
27
+
28
+ import numpy as np
29
+ from pxr import Gf, Usd, UsdGeom
30
+
31
+ import warp as wp
32
+ import warp.examples
33
+ import warp.optim
34
+ import warp.sim
35
+ import warp.sim.render
36
+
37
+ PHASE_COUNT = 8
38
+ PHASE_STEP = wp.constant((2.0 * math.pi) / PHASE_COUNT)
39
+ PHASE_FREQ = wp.constant(5.0)
40
+ ACTIVATION_STRENGTH = wp.constant(0.3)
41
+
42
+ TILE_TETS = wp.constant(8)
43
+ TILE_THREADS = 64
44
+
45
+
46
+ @wp.kernel
47
+ def loss_kernel(com: wp.array(dtype=wp.vec3), loss: wp.array(dtype=float)):
48
+ tid = wp.tid()
49
+ vx = com[tid][0]
50
+ vy = com[tid][1]
51
+ vz = com[tid][2]
52
+ delta = wp.sqrt(vx * vx) + wp.sqrt(vy * vy) - vz
53
+
54
+ wp.atomic_add(loss, 0, delta)
55
+
56
+
57
+ @wp.kernel
58
+ def com_kernel(velocities: wp.array(dtype=wp.vec3), n: int, com: wp.array(dtype=wp.vec3)):
59
+ tid = wp.tid()
60
+ v = velocities[tid]
61
+ a = v / wp.float32(n)
62
+ wp.atomic_add(com, 0, a)
63
+
64
+
65
+ @wp.kernel
66
+ def compute_phases(phases: wp.array(dtype=float), sim_time: float):
67
+ tid = wp.tid()
68
+ phases[tid] = wp.sin(PHASE_FREQ * sim_time + wp.float32(tid) * PHASE_STEP)
69
+
70
+
71
+ @wp.func
72
+ def tanh(x: float):
73
+ return wp.tanh(x) * ACTIVATION_STRENGTH
74
+
75
+
76
+ @wp.kernel
77
+ def network(
78
+ phases: wp.array2d(dtype=float), weights: wp.array2d(dtype=float), tet_activations: wp.array2d(dtype=float)
79
+ ):
80
+ # output tile index
81
+ i = wp.tid()
82
+
83
+ # GEMM
84
+ p = wp.tile_load(phases, shape=(PHASE_COUNT, 1))
85
+ w = wp.tile_load(weights, shape=(TILE_TETS, PHASE_COUNT), offset=(i * TILE_TETS, 0))
86
+ out = wp.tile_matmul(w, p)
87
+
88
+ # activation
89
+ activations = wp.tile_map(tanh, out)
90
+ wp.tile_store(tet_activations, activations, offset=(i * TILE_TETS, 0))
91
+
92
+
93
+ class Example:
94
+ def __init__(self, stage_path="example_tile_walker.usd", verbose=False, num_frames=300):
95
+ self.verbose = verbose
96
+
97
+ fps = 60
98
+ self.frame_dt = 1.0 / fps
99
+ self.num_frames = num_frames
100
+
101
+ self.sim_substeps = 80
102
+ self.sim_dt = self.frame_dt / self.sim_substeps
103
+ self.sim_time = 0.0
104
+
105
+ self.iter = 0
106
+ self.train_rate = 0.025
107
+
108
+ self.phase_count = PHASE_COUNT
109
+
110
+ self.render_time = 0.0
111
+
112
+ # bear
113
+ asset_stage = Usd.Stage.Open(os.path.join(warp.examples.get_asset_directory(), "bear.usd"))
114
+
115
+ geom = UsdGeom.Mesh(asset_stage.GetPrimAtPath("/root/bear"))
116
+ points = geom.GetPointsAttr().Get()
117
+
118
+ xform = Gf.Matrix4f(geom.ComputeLocalToWorldTransform(0.0))
119
+ for i in range(len(points)):
120
+ points[i] = xform.Transform(points[i])
121
+
122
+ self.points = [wp.vec3(point) for point in points]
123
+ self.tet_indices = geom.GetPrim().GetAttribute("tetraIndices").Get()
124
+
125
+ # sim model
126
+ builder = wp.sim.ModelBuilder()
127
+ builder.add_soft_mesh(
128
+ pos=wp.vec3(0.0, 0.5, 0.0),
129
+ rot=wp.quat_identity(),
130
+ scale=1.0,
131
+ vel=wp.vec3(0.0, 0.0, 0.0),
132
+ vertices=self.points,
133
+ indices=self.tet_indices,
134
+ density=1.0,
135
+ k_mu=2000.0,
136
+ k_lambda=2000.0,
137
+ k_damp=2.0,
138
+ tri_ke=0.0,
139
+ tri_ka=1e-8,
140
+ tri_kd=0.0,
141
+ tri_drag=0.0,
142
+ tri_lift=0.0,
143
+ )
144
+
145
+ # finalize model
146
+ self.model = builder.finalize(requires_grad=True)
147
+ self.control = self.model.control()
148
+
149
+ self.model.soft_contact_ke = 2.0e3
150
+ self.model.soft_contact_kd = 0.1
151
+ self.model.soft_contact_kf = 10.0
152
+ self.model.soft_contact_mu = 0.7
153
+
154
+ radii = wp.zeros(self.model.particle_count, dtype=float)
155
+ radii.fill_(0.05)
156
+ self.model.particle_radius = radii
157
+ self.model.ground = True
158
+
159
+ # allocate sim states
160
+ self.states = []
161
+ for _i in range(self.num_frames * self.sim_substeps + 1):
162
+ self.states.append(self.model.state(requires_grad=True))
163
+
164
+ # initialize the integrator.
165
+ self.integrator = wp.sim.SemiImplicitIntegrator()
166
+
167
+ # model input
168
+ self.phases = []
169
+ for _i in range(self.num_frames):
170
+ self.phases.append(wp.zeros(self.phase_count, dtype=float, requires_grad=True))
171
+
172
+ # weights matrix for linear network
173
+ rng = np.random.default_rng(42)
174
+ k = 1.0 / self.phase_count
175
+ weights = rng.uniform(-np.sqrt(k), np.sqrt(k), (self.model.tet_count, self.phase_count))
176
+ self.weights = wp.array(weights, dtype=float, requires_grad=True)
177
+
178
+ # tanh activation layer array
179
+ self.tet_activations = []
180
+ for _i in range(self.num_frames):
181
+ self.tet_activations.append(wp.zeros(self.model.tet_count, dtype=float, requires_grad=True))
182
+
183
+ # optimization
184
+ self.loss = wp.zeros(1, dtype=float, requires_grad=True)
185
+ self.coms = []
186
+ for _i in range(self.num_frames):
187
+ self.coms.append(wp.zeros(1, dtype=wp.vec3, requires_grad=True))
188
+ self.optimizer = warp.optim.Adam([self.weights.flatten()], lr=self.train_rate)
189
+
190
+ # rendering
191
+ if stage_path:
192
+ self.renderer = wp.sim.render.SimRenderer(self.model, stage_path)
193
+ else:
194
+ self.renderer = None
195
+
196
+ # capture forward/backward passes
197
+ self.use_cuda_graph = wp.get_device().is_cuda
198
+ if self.use_cuda_graph:
199
+ with wp.ScopedCapture() as capture:
200
+ self.tape = wp.Tape()
201
+ with self.tape:
202
+ for i in range(self.num_frames):
203
+ self.forward(i)
204
+ self.tape.backward(self.loss)
205
+ self.graph = capture.graph
206
+
207
+ def forward(self, frame):
208
+ with wp.ScopedTimer("network", active=self.verbose):
209
+ # build sinusoidal input phases
210
+ wp.launch(kernel=compute_phases, dim=self.phase_count, inputs=[self.phases[frame], self.sim_time])
211
+
212
+ # apply linear network with tanh activation
213
+ wp.launch_tiled(
214
+ kernel=network,
215
+ dim=math.ceil(self.model.tet_count / TILE_TETS),
216
+ inputs=[self.phases[frame].reshape((self.phase_count, 1)), self.weights],
217
+ outputs=[self.tet_activations[frame].reshape((self.model.tet_count, 1))],
218
+ block_dim=TILE_THREADS,
219
+ )
220
+ self.control.tet_activations = self.tet_activations[frame]
221
+
222
+ with wp.ScopedTimer("simulate", active=self.verbose):
223
+ # run simulation loop
224
+ for i in range(self.sim_substeps):
225
+ self.states[frame * self.sim_substeps + i].clear_forces()
226
+ self.integrator.simulate(
227
+ self.model,
228
+ self.states[frame * self.sim_substeps + i],
229
+ self.states[frame * self.sim_substeps + i + 1],
230
+ self.sim_dt,
231
+ self.control,
232
+ )
233
+ self.sim_time += self.sim_dt
234
+
235
+ with wp.ScopedTimer("loss", active=self.verbose):
236
+ # compute center of mass velocity
237
+ wp.launch(
238
+ com_kernel,
239
+ dim=self.model.particle_count,
240
+ inputs=[
241
+ self.states[(frame + 1) * self.sim_substeps].particle_qd,
242
+ self.model.particle_count,
243
+ self.coms[frame],
244
+ ],
245
+ outputs=[],
246
+ )
247
+ # compute loss
248
+ wp.launch(loss_kernel, dim=1, inputs=[self.coms[frame], self.loss], outputs=[])
249
+
250
+ def step(self):
251
+ with wp.ScopedTimer("step"):
252
+ if self.use_cuda_graph:
253
+ wp.capture_launch(self.graph)
254
+ else:
255
+ self.tape = wp.Tape()
256
+ with self.tape:
257
+ for i in range(self.num_frames):
258
+ self.forward(i)
259
+ self.tape.backward(self.loss)
260
+
261
+ # optimization
262
+ x = self.weights.grad.flatten()
263
+ self.optimizer.step([x])
264
+
265
+ loss = self.loss.numpy()
266
+ if self.verbose:
267
+ print(f"Iteration {self.iter}: {loss}")
268
+
269
+ # reset sim
270
+ self.sim_time = 0.0
271
+ self.states[0] = self.model.state(requires_grad=True)
272
+
273
+ # clear grads and zero arrays for next iteration
274
+ self.tape.zero()
275
+ self.loss.zero_()
276
+ for i in range(self.num_frames):
277
+ self.coms[i].zero_()
278
+
279
+ self.iter += 1
280
+
281
+ def render(self):
282
+ if self.renderer is None:
283
+ return
284
+
285
+ with wp.ScopedTimer("render"):
286
+ for i in range(self.num_frames + 1):
287
+ self.renderer.begin_frame(self.render_time)
288
+ self.renderer.render(self.states[i * self.sim_substeps])
289
+ self.renderer.end_frame()
290
+
291
+ self.render_time += self.frame_dt
292
+
293
+
294
+ if __name__ == "__main__":
295
+ import argparse
296
+
297
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
298
+ parser.add_argument("--device", type=str, default=None, help="Override the default Warp device.")
299
+ parser.add_argument(
300
+ "--stage_path",
301
+ type=lambda x: None if x == "None" else str(x),
302
+ default="example_tile_walker.usd",
303
+ help="Path to the output USD file.",
304
+ )
305
+ parser.add_argument("--num_frames", type=int, default=300, help="Total number of frames per training iteration.")
306
+ parser.add_argument("--train_iters", type=int, default=30, help="Total number of training iterations.")
307
+ parser.add_argument("--verbose", action="store_true", help="Print out additional status messages during execution.")
308
+
309
+ args = parser.parse_known_args()[0]
310
+
311
+ with wp.ScopedDevice(args.device):
312
+ example = Example(stage_path=args.stage_path, verbose=args.verbose, num_frames=args.num_frames)
313
+
314
+ for _ in range(args.train_iters):
315
+ example.step()
316
+ example.render()
317
+
318
+ if example.renderer:
319
+ example.renderer.save()
warp/math.py ADDED
@@ -0,0 +1,147 @@
1
+ # Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ from typing import Any
9
+
10
+ import warp as wp
11
+
12
+ """
13
+ Vector norm functions
14
+ """
15
+
16
+ __all__ = [
17
+ "norm_l1",
18
+ "norm_l2",
19
+ "norm_huber",
20
+ "norm_pseudo_huber",
21
+ "smooth_normalize",
22
+ ]
23
+
24
+
25
+ @wp.func
26
+ def norm_l1(v: Any):
27
+ """
28
+ Computes the L1 norm of a vector v.
29
+
30
+ .. math:: \\|v\\|_1 = \\sum_i |v_i|
31
+
32
+ Args:
33
+ v (Vector[Any,Float]): The vector to compute the L1 norm of.
34
+
35
+ Returns:
36
+ float: The L1 norm of the vector.
37
+ """
38
+ n = float(0.0)
39
+ for i in range(len(v)):
40
+ n += wp.abs(v[i])
41
+ return n
42
+
43
+
44
+ @wp.func
45
+ def norm_l2(v: Any):
46
+ """
47
+ Computes the L2 norm of a vector v.
48
+
49
+ .. math:: \\|v\\|_2 = \\sqrt{\\sum_i v_i^2}
50
+
51
+ Args:
52
+ v (Vector[Any,Float]): The vector to compute the L2 norm of.
53
+
54
+ Returns:
55
+ float: The L2 norm of the vector.
56
+ """
57
+ return wp.length(v)
58
+
59
+
60
+ @wp.func
61
+ def norm_huber(v: Any, delta: float = 1.0):
62
+ """
63
+ Computes the Huber norm of a vector v with a given delta.
64
+
65
+ .. math::
66
+ H(v) = \\begin{cases} \\frac{1}{2} \\|v\\|^2 & \\text{if } \\|v\\| \\leq \\delta \\\\ \\delta(\\|v\\| - \\frac{1}{2}\\delta) & \\text{otherwise} \\end{cases}
67
+
68
+ .. image:: /img/norm_huber.svg
69
+ :align: center
70
+
71
+ Args:
72
+ v (Vector[Any,Float]): The vector to compute the Huber norm of.
73
+ delta (float): The threshold value, defaults to 1.0.
74
+
75
+ Returns:
76
+ float: The Huber norm of the vector.
77
+ """
78
+ a = wp.dot(v, v)
79
+ if a <= delta * delta:
80
+ return 0.5 * a
81
+ return delta * (wp.sqrt(a) - 0.5 * delta)
82
+
83
+
84
+ @wp.func
85
+ def norm_pseudo_huber(v: Any, delta: float = 1.0):
86
+ """
87
+ Computes the "pseudo" Huber norm of a vector v with a given delta.
88
+
89
+ .. math::
90
+ H^\\prime(v) = \\delta \\sqrt{1 + \\frac{\\|v\\|^2}{\\delta^2}}
91
+
92
+ .. image:: /img/norm_pseudo_huber.svg
93
+ :align: center
94
+
95
+ Args:
96
+ v (Vector[Any,Float]): The vector to compute the Huber norm of.
97
+ delta (float): The threshold value, defaults to 1.0.
98
+
99
+ Returns:
100
+ float: The Huber norm of the vector.
101
+ """
102
+ a = wp.dot(v, v)
103
+ return delta * wp.sqrt(1.0 + a / (delta * delta))
104
+
105
+
106
+ @wp.func
107
+ def smooth_normalize(v: Any, delta: float = 1.0):
108
+ """
109
+ Normalizes a vector using the pseudo-Huber norm.
110
+
111
+ See :func:`norm_pseudo_huber`.
112
+
113
+ .. math::
114
+ \\frac{v}{H^\\prime(v)}
115
+
116
+ Args:
117
+ v (Vector[Any,Float]): The vector to normalize.
118
+ delta (float): The threshold value, defaults to 1.0.
119
+
120
+ Returns:
121
+ Vector[Any,Float]: The normalized vector.
122
+ """
123
+ return v / norm_pseudo_huber(v, delta)
124
+
125
+
126
+ # register API functions so they appear in the documentation
127
+
128
+ wp.context.register_api_function(
129
+ norm_l1,
130
+ group="Vector Math",
131
+ )
132
+ wp.context.register_api_function(
133
+ norm_l2,
134
+ group="Vector Math",
135
+ )
136
+ wp.context.register_api_function(
137
+ norm_huber,
138
+ group="Vector Math",
139
+ )
140
+ wp.context.register_api_function(
141
+ norm_pseudo_huber,
142
+ group="Vector Math",
143
+ )
144
+ wp.context.register_api_function(
145
+ smooth_normalize,
146
+ group="Vector Math",
147
+ )
warp/native/array.h CHANGED
@@ -1106,6 +1106,18 @@ inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k,
1106
1106
  FP_VERIFY_ADJ_4(value, adj_value)
1107
1107
  }
1108
1108
 
1109
+ template<template<typename> class A, typename T>
1110
+ CUDA_CALLABLE inline int len(const A<T>& a)
1111
+ {
1112
+ return a.shape[0];
1113
+ }
1114
+
1115
+ template<template<typename> class A, typename T>
1116
+ CUDA_CALLABLE inline void adj_len(const A<T>& a, A<T>& adj_a, int& adj_ret)
1117
+ {
1118
+ }
1119
+
1120
+
1109
1121
  } // namespace wp
1110
1122
 
1111
1123
  #include "fabric.h"
warp/native/builtin.h CHANGED
@@ -1761,6 +1761,5 @@ inline CUDA_CALLABLE void adj_expect_near(const vec3& actual, const vec3& expect
1761
1761
  // only include in kernels for now
1762
1762
  #if defined(__CUDACC_RTC__)
1763
1763
  #include "tile.h"
1764
- #include "tile_gemm.h"
1765
1764
  #include "tile_reduce.h"
1766
1765
  #endif