warp-lang 1.4.1__py3-none-macosx_10_13_universal2.whl → 1.4.2__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

warp/native/array.h CHANGED
@@ -811,7 +811,7 @@ CUDA_CALLABLE inline void adj_atomic_add(bool* buf, bool value) { }
811
811
 
812
812
  // only generate gradients for T types
813
813
  template<typename T>
814
- inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, const array_t<T>& adj_buf, int& adj_i, const T& adj_output)
814
+ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, const array_t<T>& adj_buf, int adj_i, const T& adj_output)
815
815
  {
816
816
  if (adj_buf.data)
817
817
  adj_atomic_add(&index(adj_buf, i), adj_output);
@@ -819,7 +819,7 @@ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, const array_
819
819
  adj_atomic_add(&index_grad(buf, i), adj_output);
820
820
  }
821
821
  template<typename T>
822
- inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, const array_t<T>& adj_buf, int& adj_i, int& adj_j, const T& adj_output)
822
+ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, const array_t<T>& adj_buf, int adj_i, int adj_j, const T& adj_output)
823
823
  {
824
824
  if (adj_buf.data)
825
825
  adj_atomic_add(&index(adj_buf, i, j), adj_output);
@@ -827,7 +827,7 @@ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, const
827
827
  adj_atomic_add(&index_grad(buf, i, j), adj_output);
828
828
  }
829
829
  template<typename T>
830
- inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k, const array_t<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, const T& adj_output)
830
+ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, const T& adj_output)
831
831
  {
832
832
  if (adj_buf.data)
833
833
  adj_atomic_add(&index(adj_buf, i, j, k), adj_output);
@@ -835,7 +835,7 @@ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k
835
835
  adj_atomic_add(&index_grad(buf, i, j, k), adj_output);
836
836
  }
837
837
  template<typename T>
838
- inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k, int l, const array_t<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, const T& adj_output)
838
+ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k, int l, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, const T& adj_output)
839
839
  {
840
840
  if (adj_buf.data)
841
841
  adj_atomic_add(&index(adj_buf, i, j, k, l), adj_output);
@@ -844,7 +844,7 @@ inline CUDA_CALLABLE void adj_address(const array_t<T>& buf, int i, int j, int k
844
844
  }
845
845
 
846
846
  template<typename T>
847
- inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int& adj_i, T& adj_value)
847
+ inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int adj_i, T& adj_value)
848
848
  {
849
849
  if (adj_buf.data)
850
850
  adj_value += index(adj_buf, i);
@@ -854,7 +854,7 @@ inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, T value,
854
854
  FP_VERIFY_ADJ_1(value, adj_value)
855
855
  }
856
856
  template<typename T>
857
- inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value)
857
+ inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, T& adj_value)
858
858
  {
859
859
  if (adj_buf.data)
860
860
  adj_value += index(adj_buf, i, j);
@@ -864,7 +864,7 @@ inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, T
864
864
  FP_VERIFY_ADJ_2(value, adj_value)
865
865
  }
866
866
  template<typename T>
867
- inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value)
867
+ inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value)
868
868
  {
869
869
  if (adj_buf.data)
870
870
  adj_value += index(adj_buf, i, j, k);
@@ -874,7 +874,7 @@ inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, i
874
874
  FP_VERIFY_ADJ_3(value, adj_value)
875
875
  }
876
876
  template<typename T>
877
- inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value)
877
+ inline CUDA_CALLABLE void adj_array_store(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value)
878
878
  {
879
879
  if (adj_buf.data)
880
880
  adj_value += index(adj_buf, i, j, k, l);
@@ -898,7 +898,7 @@ inline CUDA_CALLABLE void adj_load(const T* address, const T& adj_address, T& ad
898
898
  }
899
899
 
900
900
  template<typename T>
901
- inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret)
901
+ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret)
902
902
  {
903
903
  if (adj_buf.data)
904
904
  adj_value += index(adj_buf, i);
@@ -908,7 +908,7 @@ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, T value,
908
908
  FP_VERIFY_ADJ_1(value, adj_value)
909
909
  }
910
910
  template<typename T>
911
- inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value, const T& adj_ret)
911
+ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret)
912
912
  {
913
913
  if (adj_buf.data)
914
914
  adj_value += index(adj_buf, i, j);
@@ -918,7 +918,7 @@ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, T
918
918
  FP_VERIFY_ADJ_2(value, adj_value)
919
919
  }
920
920
  template<typename T>
921
- inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value, const T& adj_ret)
921
+ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret)
922
922
  {
923
923
  if (adj_buf.data)
924
924
  adj_value += index(adj_buf, i, j, k);
@@ -928,7 +928,7 @@ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, in
928
928
  FP_VERIFY_ADJ_3(value, adj_value)
929
929
  }
930
930
  template<typename T>
931
- inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value, const T& adj_ret)
931
+ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret)
932
932
  {
933
933
  if (adj_buf.data)
934
934
  adj_value += index(adj_buf, i, j, k, l);
@@ -939,7 +939,7 @@ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, in
939
939
  }
940
940
 
941
941
  template<typename T>
942
- inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret)
942
+ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret)
943
943
  {
944
944
  if (adj_buf.data)
945
945
  adj_value -= index(adj_buf, i);
@@ -949,7 +949,7 @@ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, T value,
949
949
  FP_VERIFY_ADJ_1(value, adj_value)
950
950
  }
951
951
  template<typename T>
952
- inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value, const T& adj_ret)
952
+ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret)
953
953
  {
954
954
  if (adj_buf.data)
955
955
  adj_value -= index(adj_buf, i, j);
@@ -959,7 +959,7 @@ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, T
959
959
  FP_VERIFY_ADJ_2(value, adj_value)
960
960
  }
961
961
  template<typename T>
962
- inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value, const T& adj_ret)
962
+ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, int k, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret)
963
963
  {
964
964
  if (adj_buf.data)
965
965
  adj_value -= index(adj_buf, i, j, k);
@@ -969,7 +969,7 @@ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, in
969
969
  FP_VERIFY_ADJ_3(value, adj_value)
970
970
  }
971
971
  template<typename T>
972
- inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value, const T& adj_ret)
972
+ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, int k, int l, T value, const array_t<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret)
973
973
  {
974
974
  if (adj_buf.data)
975
975
  adj_value -= index(adj_buf, i, j, k, l);
@@ -981,44 +981,44 @@ inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, int j, in
981
981
 
982
982
  // generic array types that do not support gradient computation (indexedarray, etc.)
983
983
  template<template<typename> class A1, template<typename> class A2, typename T>
984
- inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, const A2<T>& adj_buf, int& adj_i, const T& adj_output) {}
984
+ inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, const A2<T>& adj_buf, int adj_i, const T& adj_output) {}
985
985
  template<template<typename> class A1, template<typename> class A2, typename T>
986
- inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, const A2<T>& adj_buf, int& adj_i, int& adj_j, const T& adj_output) {}
986
+ inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, const A2<T>& adj_buf, int adj_i, int adj_j, const T& adj_output) {}
987
987
  template<template<typename> class A1, template<typename> class A2, typename T>
988
- inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, int k, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, const T& adj_output) {}
988
+ inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, int k, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, const T& adj_output) {}
989
989
  template<template<typename> class A1, template<typename> class A2, typename T>
990
- inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, int k, int l, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, const T& adj_output) {}
990
+ inline CUDA_CALLABLE void adj_address(const A1<T>& buf, int i, int j, int k, int l, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, const T& adj_output) {}
991
991
 
992
992
  template<template<typename> class A1, template<typename> class A2, typename T>
993
- inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int& adj_i, T& adj_value) {}
993
+ inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int adj_i, T& adj_value) {}
994
994
  template<template<typename> class A1, template<typename> class A2, typename T>
995
- inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value) {}
995
+ inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int adj_i, int adj_j, T& adj_value) {}
996
996
  template<template<typename> class A1, template<typename> class A2, typename T>
997
- inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value) {}
997
+ inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value) {}
998
998
  template<template<typename> class A1, template<typename> class A2, typename T>
999
- inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value) {}
999
+ inline CUDA_CALLABLE void adj_array_store(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value) {}
1000
1000
 
1001
1001
  template<template<typename> class A1, template<typename> class A2, typename T>
1002
- inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret) {}
1002
+ inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret) {}
1003
1003
  template<template<typename> class A1, template<typename> class A2, typename T>
1004
- inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value, const T& adj_ret) {}
1004
+ inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret) {}
1005
1005
  template<template<typename> class A1, template<typename> class A2, typename T>
1006
- inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value, const T& adj_ret) {}
1006
+ inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret) {}
1007
1007
  template<template<typename> class A1, template<typename> class A2, typename T>
1008
- inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value, const T& adj_ret) {}
1008
+ inline CUDA_CALLABLE void adj_atomic_add(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret) {}
1009
1009
 
1010
1010
  template<template<typename> class A1, template<typename> class A2, typename T>
1011
- inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret) {}
1011
+ inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret) {}
1012
1012
  template<template<typename> class A1, template<typename> class A2, typename T>
1013
- inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value, const T& adj_ret) {}
1013
+ inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret) {}
1014
1014
  template<template<typename> class A1, template<typename> class A2, typename T>
1015
- inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value, const T& adj_ret) {}
1015
+ inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret) {}
1016
1016
  template<template<typename> class A1, template<typename> class A2, typename T>
1017
- inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value, const T& adj_ret) {}
1017
+ inline CUDA_CALLABLE void adj_atomic_sub(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret) {}
1018
1018
 
1019
1019
  // generic handler for scalar values
1020
1020
  template<template<typename> class A1, template<typename> class A2, typename T>
1021
- inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret) {
1021
+ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret) {
1022
1022
  if (adj_buf.data)
1023
1023
  adj_atomic_minmax(&index(buf, i), &index(adj_buf, i), value, adj_value);
1024
1024
  else if (buf.grad)
@@ -1027,7 +1027,7 @@ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, T value, const
1027
1027
  FP_VERIFY_ADJ_1(value, adj_value)
1028
1028
  }
1029
1029
  template<template<typename> class A1, template<typename> class A2, typename T>
1030
- inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value, const T& adj_ret) {
1030
+ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret) {
1031
1031
  if (adj_buf.data)
1032
1032
  adj_atomic_minmax(&index(buf, i, j), &index(adj_buf, i, j), value, adj_value);
1033
1033
  else if (buf.grad)
@@ -1036,7 +1036,7 @@ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, T value
1036
1036
  FP_VERIFY_ADJ_2(value, adj_value)
1037
1037
  }
1038
1038
  template<template<typename> class A1, template<typename> class A2, typename T>
1039
- inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value, const T& adj_ret) {
1039
+ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret) {
1040
1040
  if (adj_buf.data)
1041
1041
  adj_atomic_minmax(&index(buf, i, j, k), &index(adj_buf, i, j, k), value, adj_value);
1042
1042
  else if (buf.grad)
@@ -1045,7 +1045,7 @@ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k,
1045
1045
  FP_VERIFY_ADJ_3(value, adj_value)
1046
1046
  }
1047
1047
  template<template<typename> class A1, template<typename> class A2, typename T>
1048
- inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value, const T& adj_ret) {
1048
+ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret) {
1049
1049
  if (adj_buf.data)
1050
1050
  adj_atomic_minmax(&index(buf, i, j, k, l), &index(adj_buf, i, j, k, l), value, adj_value);
1051
1051
  else if (buf.grad)
@@ -1055,7 +1055,7 @@ inline CUDA_CALLABLE void adj_atomic_min(const A1<T>& buf, int i, int j, int k,
1055
1055
  }
1056
1056
 
1057
1057
  template<template<typename> class A1, template<typename> class A2, typename T>
1058
- inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret) {
1058
+ inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, T value, const A2<T>& adj_buf, int adj_i, T& adj_value, const T& adj_ret) {
1059
1059
  if (adj_buf.data)
1060
1060
  adj_atomic_minmax(&index(buf, i), &index(adj_buf, i), value, adj_value);
1061
1061
  else if (buf.grad)
@@ -1064,7 +1064,7 @@ inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, T value, const
1064
1064
  FP_VERIFY_ADJ_1(value, adj_value)
1065
1065
  }
1066
1066
  template<template<typename> class A1, template<typename> class A2, typename T>
1067
- inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, T& adj_value, const T& adj_ret) {
1067
+ inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, T value, const A2<T>& adj_buf, int adj_i, int adj_j, T& adj_value, const T& adj_ret) {
1068
1068
  if (adj_buf.data)
1069
1069
  adj_atomic_minmax(&index(buf, i, j), &index(adj_buf, i, j), value, adj_value);
1070
1070
  else if (buf.grad)
@@ -1073,7 +1073,7 @@ inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, T value
1073
1073
  FP_VERIFY_ADJ_2(value, adj_value)
1074
1074
  }
1075
1075
  template<template<typename> class A1, template<typename> class A2, typename T>
1076
- inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, T& adj_value, const T& adj_ret) {
1076
+ inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, T& adj_value, const T& adj_ret) {
1077
1077
  if (adj_buf.data)
1078
1078
  adj_atomic_minmax(&index(buf, i, j, k), &index(adj_buf, i, j, k), value, adj_value);
1079
1079
  else if (buf.grad)
@@ -1082,7 +1082,7 @@ inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k,
1082
1082
  FP_VERIFY_ADJ_3(value, adj_value)
1083
1083
  }
1084
1084
  template<template<typename> class A1, template<typename> class A2, typename T>
1085
- inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int& adj_i, int& adj_j, int& adj_k, int& adj_l, T& adj_value, const T& adj_ret) {
1085
+ inline CUDA_CALLABLE void adj_atomic_max(const A1<T>& buf, int i, int j, int k, int l, T value, const A2<T>& adj_buf, int adj_i, int adj_j, int adj_k, int adj_l, T& adj_value, const T& adj_ret) {
1086
1086
  if (adj_buf.data)
1087
1087
  adj_atomic_minmax(&index(buf, i, j, k, l), &index(adj_buf, i, j, k, l), value, adj_value);
1088
1088
  else if (buf.grad)
warp/native/builtin.h CHANGED
@@ -1575,32 +1575,73 @@ inline CUDA_CALLABLE void print(transform_t<Type> t)
1575
1575
  printf("(%g %g %g) (%g %g %g %g)\n", float(t.p[0]), float(t.p[1]), float(t.p[2]), float(t.q.x), float(t.q.y), float(t.q.z), float(t.q.w));
1576
1576
  }
1577
1577
 
1578
- inline CUDA_CALLABLE void adj_print(int i, int adj_i) { printf("%d adj: %d\n", i, adj_i); }
1579
- inline CUDA_CALLABLE void adj_print(float f, float adj_f) { printf("%g adj: %g\n", f, adj_f); }
1580
- inline CUDA_CALLABLE void adj_print(short f, short adj_f) { printf("%hd adj: %hd\n", f, adj_f); }
1581
- inline CUDA_CALLABLE void adj_print(long f, long adj_f) { printf("%ld adj: %ld\n", f, adj_f); }
1582
- inline CUDA_CALLABLE void adj_print(long long f, long long adj_f) { printf("%lld adj: %lld\n", f, adj_f); }
1583
- inline CUDA_CALLABLE void adj_print(unsigned f, unsigned adj_f) { printf("%u adj: %u\n", f, adj_f); }
1584
- inline CUDA_CALLABLE void adj_print(unsigned short f, unsigned short adj_f) { printf("%hu adj: %hu\n", f, adj_f); }
1585
- inline CUDA_CALLABLE void adj_print(unsigned long f, unsigned long adj_f) { printf("%lu adj: %lu\n", f, adj_f); }
1586
- inline CUDA_CALLABLE void adj_print(unsigned long long f, unsigned long long adj_f) { printf("%llu adj: %llu\n", f, adj_f); }
1587
- inline CUDA_CALLABLE void adj_print(half h, half adj_h) { printf("%g adj: %g\n", half_to_float(h), half_to_float(adj_h)); }
1588
- inline CUDA_CALLABLE void adj_print(double f, double adj_f) { printf("%g adj: %g\n", f, adj_f); }
1578
+ template<typename T>
1579
+ inline CUDA_CALLABLE void adj_print(const T& x, const T& adj_x)
1580
+ {
1581
+ printf("adj: <type without print implementation>\n");
1582
+ }
1583
+
1584
+ // note: adj_print() only prints the adjoint value, since the value itself gets printed in replay print()
1585
+ inline CUDA_CALLABLE void adj_print(half x, half adj_x) { printf("adj: %g\n", half_to_float(adj_x)); }
1586
+ inline CUDA_CALLABLE void adj_print(float x, float adj_x) { printf("adj: %g\n", adj_x); }
1587
+ inline CUDA_CALLABLE void adj_print(double x, double adj_x) { printf("adj: %g\n", adj_x); }
1588
+
1589
+ inline CUDA_CALLABLE void adj_print(signed char x, signed char adj_x) { printf("adj: %d\n", adj_x); }
1590
+ inline CUDA_CALLABLE void adj_print(short x, short adj_x) { printf("adj: %d\n", adj_x); }
1591
+ inline CUDA_CALLABLE void adj_print(int x, int adj_x) { printf("adj: %d\n", adj_x); }
1592
+ inline CUDA_CALLABLE void adj_print(long x, long adj_x) { printf("adj: %ld\n", adj_x); }
1593
+ inline CUDA_CALLABLE void adj_print(long long x, long long adj_x) { printf("adj: %lld\n", adj_x); }
1594
+
1595
+ inline CUDA_CALLABLE void adj_print(unsigned char x, unsigned char adj_x) { printf("adj: %u\n", adj_x); }
1596
+ inline CUDA_CALLABLE void adj_print(unsigned short x, unsigned short adj_x) { printf("adj: %u\n", adj_x); }
1597
+ inline CUDA_CALLABLE void adj_print(unsigned x, unsigned adj_x) { printf("adj: %u\n", adj_x); }
1598
+ inline CUDA_CALLABLE void adj_print(unsigned long x, unsigned long adj_x) { printf("adj: %lu\n", adj_x); }
1599
+ inline CUDA_CALLABLE void adj_print(unsigned long long x, unsigned long long adj_x) { printf("adj: %llu\n", adj_x); }
1600
+
1601
+ inline CUDA_CALLABLE void adj_print(bool x, bool adj_x) { printf("adj: %s\n", (adj_x ? "True" : "False")); }
1589
1602
 
1590
1603
  template<unsigned Length, typename Type>
1591
- inline CUDA_CALLABLE void adj_print(vec_t<Length, Type> v, vec_t<Length, Type>& adj_v) { printf("%g %g adj: %g %g \n", v[0], v[1], adj_v[0], adj_v[1]); }
1604
+ inline CUDA_CALLABLE void adj_print(const vec_t<Length, Type>& v, const vec_t<Length, Type>& adj_v)
1605
+ {
1606
+ printf("adj:");
1607
+ for (unsigned i = 0; i < Length; i++)
1608
+ printf(" %g", float(adj_v[i]));
1609
+ printf("\n");
1610
+ }
1592
1611
 
1593
1612
  template<unsigned Rows, unsigned Cols, typename Type>
1594
- inline CUDA_CALLABLE void adj_print(mat_t<Rows, Cols, Type> m, mat_t<Rows, Cols, Type>& adj_m) { }
1613
+ inline CUDA_CALLABLE void adj_print(const mat_t<Rows, Cols, Type>& m, const mat_t<Rows, Cols, Type>& adj_m)
1614
+ {
1615
+ for (unsigned i = 0; i < Rows; i++)
1616
+ {
1617
+ if (i == 0)
1618
+ printf("adj:");
1619
+ else
1620
+ printf(" ");
1621
+ for (unsigned j = 0; j < Cols; j++)
1622
+ printf(" %g", float(adj_m.data[i][j]));
1623
+ printf("\n");
1624
+ }
1625
+ }
1595
1626
 
1596
1627
  template<typename Type>
1597
- inline CUDA_CALLABLE void adj_print(quat_t<Type> q, quat_t<Type>& adj_q) { printf("%g %g %g %g adj: %g %g %g %g\n", q.x, q.y, q.z, q.w, adj_q.x, adj_q.y, adj_q.z, adj_q.w); }
1628
+ inline CUDA_CALLABLE void adj_print(const quat_t<Type>& q, const quat_t<Type>& adj_q)
1629
+ {
1630
+ printf("adj: %g %g %g %g\n", float(adj_q.x), float(adj_q.y), float(adj_q.z), float(adj_q.w));
1631
+ }
1598
1632
 
1599
1633
  template<typename Type>
1600
- inline CUDA_CALLABLE void adj_print(transform_t<Type> t, transform_t<Type>& adj_t) {}
1601
-
1602
- inline CUDA_CALLABLE void adj_print(str t, str& adj_t) {}
1634
+ inline CUDA_CALLABLE void adj_print(const transform_t<Type>& t, const transform_t<Type>& adj_t)
1635
+ {
1636
+ printf("adj: (%g %g %g) (%g %g %g %g)\n",
1637
+ float(adj_t.p[0]), float(adj_t.p[1]), float(adj_t.p[2]),
1638
+ float(adj_t.q.x), float(adj_t.q.y), float(adj_t.q.z), float(adj_t.q.w));
1639
+ }
1603
1640
 
1641
+ inline CUDA_CALLABLE void adj_print(str t, str& adj_t)
1642
+ {
1643
+ printf("adj: %s\n", t);
1644
+ }
1604
1645
 
1605
1646
  template <typename T>
1606
1647
  inline CUDA_CALLABLE void expect_eq(const T& actual, const T& expected)
warp/sim/model.py CHANGED
@@ -4060,7 +4060,7 @@ class ModelBuilder:
4060
4060
  radius_mean: float = default_particle_radius,
4061
4061
  radius_std: float = 0.0,
4062
4062
  ):
4063
- rng = np.random.default_rng()
4063
+ rng = np.random.default_rng(42)
4064
4064
  for z in range(dim_z):
4065
4065
  for y in range(dim_y):
4066
4066
  for x in range(dim_x):
@@ -4070,7 +4070,7 @@ class ModelBuilder:
4070
4070
  p = wp.quat_rotate(rot, v) + pos + wp.vec3(rng.random(3) * jitter)
4071
4071
 
4072
4072
  if radius_std > 0.0:
4073
- r = radius_mean + np.random.randn() * radius_std
4073
+ r = radius_mean + rng.standard_normal() * radius_std
4074
4074
  else:
4075
4075
  r = radius_mean
4076
4076
  self.add_particle(p, vel, m, r)