xtgeo 4.10.0__cp310-cp310-macosx_11_0_arm64.whl → 4.10.1__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xtgeo might be problematic. Click here for more details.

Files changed (561) hide show
  1. xtgeo/_internal.cpython-310-darwin.so +0 -0
  2. xtgeo/common/version.py +2 -2
  3. xtgeo/include/fmt/args.h +220 -0
  4. xtgeo/include/fmt/base.h +2989 -0
  5. xtgeo/include/fmt/chrono.h +2330 -0
  6. xtgeo/include/fmt/color.h +637 -0
  7. xtgeo/include/fmt/compile.h +539 -0
  8. xtgeo/include/fmt/core.h +5 -0
  9. xtgeo/include/fmt/format-inl.h +1948 -0
  10. xtgeo/include/fmt/format.h +4244 -0
  11. xtgeo/include/fmt/os.h +427 -0
  12. xtgeo/include/fmt/ostream.h +167 -0
  13. xtgeo/include/fmt/printf.h +633 -0
  14. xtgeo/include/fmt/ranges.h +850 -0
  15. xtgeo/include/fmt/std.h +728 -0
  16. xtgeo/include/fmt/xchar.h +369 -0
  17. xtgeo/lib/cmake/fmt/fmt-config-version.cmake +43 -0
  18. xtgeo/lib/cmake/fmt/fmt-config.cmake +31 -0
  19. xtgeo/lib/cmake/fmt/fmt-targets-release.cmake +19 -0
  20. xtgeo/{share/eigen3/cmake/Eigen3Targets.cmake → lib/cmake/fmt/fmt-targets.cmake} +16 -6
  21. xtgeo/lib/libfmt.a +0 -0
  22. xtgeo/lib/pkgconfig/fmt.pc +11 -0
  23. {xtgeo-4.10.0.dist-info → xtgeo-4.10.1.dist-info}/METADATA +1 -1
  24. xtgeo-4.10.1.dist-info/RECORD +137 -0
  25. xtgeo/include/eigen3/Eigen/Cholesky +0 -45
  26. xtgeo/include/eigen3/Eigen/CholmodSupport +0 -48
  27. xtgeo/include/eigen3/Eigen/Core +0 -384
  28. xtgeo/include/eigen3/Eigen/Dense +0 -7
  29. xtgeo/include/eigen3/Eigen/Eigen +0 -2
  30. xtgeo/include/eigen3/Eigen/Eigenvalues +0 -60
  31. xtgeo/include/eigen3/Eigen/Geometry +0 -59
  32. xtgeo/include/eigen3/Eigen/Householder +0 -29
  33. xtgeo/include/eigen3/Eigen/IterativeLinearSolvers +0 -48
  34. xtgeo/include/eigen3/Eigen/Jacobi +0 -32
  35. xtgeo/include/eigen3/Eigen/KLUSupport +0 -41
  36. xtgeo/include/eigen3/Eigen/LU +0 -47
  37. xtgeo/include/eigen3/Eigen/MetisSupport +0 -35
  38. xtgeo/include/eigen3/Eigen/OrderingMethods +0 -70
  39. xtgeo/include/eigen3/Eigen/PaStiXSupport +0 -49
  40. xtgeo/include/eigen3/Eigen/PardisoSupport +0 -35
  41. xtgeo/include/eigen3/Eigen/QR +0 -50
  42. xtgeo/include/eigen3/Eigen/QtAlignedMalloc +0 -39
  43. xtgeo/include/eigen3/Eigen/SPQRSupport +0 -34
  44. xtgeo/include/eigen3/Eigen/SVD +0 -50
  45. xtgeo/include/eigen3/Eigen/Sparse +0 -34
  46. xtgeo/include/eigen3/Eigen/SparseCholesky +0 -37
  47. xtgeo/include/eigen3/Eigen/SparseCore +0 -69
  48. xtgeo/include/eigen3/Eigen/SparseLU +0 -50
  49. xtgeo/include/eigen3/Eigen/SparseQR +0 -36
  50. xtgeo/include/eigen3/Eigen/StdDeque +0 -27
  51. xtgeo/include/eigen3/Eigen/StdList +0 -26
  52. xtgeo/include/eigen3/Eigen/StdVector +0 -27
  53. xtgeo/include/eigen3/Eigen/SuperLUSupport +0 -64
  54. xtgeo/include/eigen3/Eigen/UmfPackSupport +0 -40
  55. xtgeo/include/eigen3/Eigen/src/Cholesky/LDLT.h +0 -688
  56. xtgeo/include/eigen3/Eigen/src/Cholesky/LLT.h +0 -558
  57. xtgeo/include/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h +0 -99
  58. xtgeo/include/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +0 -682
  59. xtgeo/include/eigen3/Eigen/src/Core/ArithmeticSequence.h +0 -413
  60. xtgeo/include/eigen3/Eigen/src/Core/Array.h +0 -417
  61. xtgeo/include/eigen3/Eigen/src/Core/ArrayBase.h +0 -226
  62. xtgeo/include/eigen3/Eigen/src/Core/ArrayWrapper.h +0 -209
  63. xtgeo/include/eigen3/Eigen/src/Core/Assign.h +0 -90
  64. xtgeo/include/eigen3/Eigen/src/Core/AssignEvaluator.h +0 -1010
  65. xtgeo/include/eigen3/Eigen/src/Core/Assign_MKL.h +0 -178
  66. xtgeo/include/eigen3/Eigen/src/Core/BandMatrix.h +0 -353
  67. xtgeo/include/eigen3/Eigen/src/Core/Block.h +0 -448
  68. xtgeo/include/eigen3/Eigen/src/Core/BooleanRedux.h +0 -162
  69. xtgeo/include/eigen3/Eigen/src/Core/CommaInitializer.h +0 -164
  70. xtgeo/include/eigen3/Eigen/src/Core/ConditionEstimator.h +0 -175
  71. xtgeo/include/eigen3/Eigen/src/Core/CoreEvaluators.h +0 -1741
  72. xtgeo/include/eigen3/Eigen/src/Core/CoreIterators.h +0 -132
  73. xtgeo/include/eigen3/Eigen/src/Core/CwiseBinaryOp.h +0 -183
  74. xtgeo/include/eigen3/Eigen/src/Core/CwiseNullaryOp.h +0 -1001
  75. xtgeo/include/eigen3/Eigen/src/Core/CwiseTernaryOp.h +0 -197
  76. xtgeo/include/eigen3/Eigen/src/Core/CwiseUnaryOp.h +0 -103
  77. xtgeo/include/eigen3/Eigen/src/Core/CwiseUnaryView.h +0 -132
  78. xtgeo/include/eigen3/Eigen/src/Core/DenseBase.h +0 -701
  79. xtgeo/include/eigen3/Eigen/src/Core/DenseCoeffsBase.h +0 -685
  80. xtgeo/include/eigen3/Eigen/src/Core/DenseStorage.h +0 -652
  81. xtgeo/include/eigen3/Eigen/src/Core/Diagonal.h +0 -258
  82. xtgeo/include/eigen3/Eigen/src/Core/DiagonalMatrix.h +0 -391
  83. xtgeo/include/eigen3/Eigen/src/Core/DiagonalProduct.h +0 -28
  84. xtgeo/include/eigen3/Eigen/src/Core/Dot.h +0 -318
  85. xtgeo/include/eigen3/Eigen/src/Core/EigenBase.h +0 -160
  86. xtgeo/include/eigen3/Eigen/src/Core/ForceAlignedAccess.h +0 -150
  87. xtgeo/include/eigen3/Eigen/src/Core/Fuzzy.h +0 -155
  88. xtgeo/include/eigen3/Eigen/src/Core/GeneralProduct.h +0 -465
  89. xtgeo/include/eigen3/Eigen/src/Core/GenericPacketMath.h +0 -1040
  90. xtgeo/include/eigen3/Eigen/src/Core/GlobalFunctions.h +0 -194
  91. xtgeo/include/eigen3/Eigen/src/Core/IO.h +0 -258
  92. xtgeo/include/eigen3/Eigen/src/Core/IndexedView.h +0 -237
  93. xtgeo/include/eigen3/Eigen/src/Core/Inverse.h +0 -117
  94. xtgeo/include/eigen3/Eigen/src/Core/Map.h +0 -171
  95. xtgeo/include/eigen3/Eigen/src/Core/MapBase.h +0 -310
  96. xtgeo/include/eigen3/Eigen/src/Core/MathFunctions.h +0 -2057
  97. xtgeo/include/eigen3/Eigen/src/Core/MathFunctionsImpl.h +0 -200
  98. xtgeo/include/eigen3/Eigen/src/Core/Matrix.h +0 -565
  99. xtgeo/include/eigen3/Eigen/src/Core/MatrixBase.h +0 -547
  100. xtgeo/include/eigen3/Eigen/src/Core/NestByValue.h +0 -85
  101. xtgeo/include/eigen3/Eigen/src/Core/NoAlias.h +0 -109
  102. xtgeo/include/eigen3/Eigen/src/Core/NumTraits.h +0 -335
  103. xtgeo/include/eigen3/Eigen/src/Core/PartialReduxEvaluator.h +0 -232
  104. xtgeo/include/eigen3/Eigen/src/Core/PermutationMatrix.h +0 -605
  105. xtgeo/include/eigen3/Eigen/src/Core/PlainObjectBase.h +0 -1128
  106. xtgeo/include/eigen3/Eigen/src/Core/Product.h +0 -191
  107. xtgeo/include/eigen3/Eigen/src/Core/ProductEvaluators.h +0 -1179
  108. xtgeo/include/eigen3/Eigen/src/Core/Random.h +0 -218
  109. xtgeo/include/eigen3/Eigen/src/Core/Redux.h +0 -515
  110. xtgeo/include/eigen3/Eigen/src/Core/Ref.h +0 -381
  111. xtgeo/include/eigen3/Eigen/src/Core/Replicate.h +0 -142
  112. xtgeo/include/eigen3/Eigen/src/Core/Reshaped.h +0 -454
  113. xtgeo/include/eigen3/Eigen/src/Core/ReturnByValue.h +0 -119
  114. xtgeo/include/eigen3/Eigen/src/Core/Reverse.h +0 -217
  115. xtgeo/include/eigen3/Eigen/src/Core/Select.h +0 -164
  116. xtgeo/include/eigen3/Eigen/src/Core/SelfAdjointView.h +0 -365
  117. xtgeo/include/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h +0 -47
  118. xtgeo/include/eigen3/Eigen/src/Core/Solve.h +0 -188
  119. xtgeo/include/eigen3/Eigen/src/Core/SolveTriangular.h +0 -235
  120. xtgeo/include/eigen3/Eigen/src/Core/SolverBase.h +0 -168
  121. xtgeo/include/eigen3/Eigen/src/Core/StableNorm.h +0 -251
  122. xtgeo/include/eigen3/Eigen/src/Core/StlIterators.h +0 -463
  123. xtgeo/include/eigen3/Eigen/src/Core/Stride.h +0 -116
  124. xtgeo/include/eigen3/Eigen/src/Core/Swap.h +0 -68
  125. xtgeo/include/eigen3/Eigen/src/Core/Transpose.h +0 -464
  126. xtgeo/include/eigen3/Eigen/src/Core/Transpositions.h +0 -386
  127. xtgeo/include/eigen3/Eigen/src/Core/TriangularMatrix.h +0 -1001
  128. xtgeo/include/eigen3/Eigen/src/Core/VectorBlock.h +0 -96
  129. xtgeo/include/eigen3/Eigen/src/Core/VectorwiseOp.h +0 -784
  130. xtgeo/include/eigen3/Eigen/src/Core/Visitor.h +0 -381
  131. xtgeo/include/eigen3/Eigen/src/Core/arch/AVX/Complex.h +0 -372
  132. xtgeo/include/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h +0 -228
  133. xtgeo/include/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h +0 -1574
  134. xtgeo/include/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h +0 -115
  135. xtgeo/include/eigen3/Eigen/src/Core/arch/AVX512/Complex.h +0 -422
  136. xtgeo/include/eigen3/Eigen/src/Core/arch/AVX512/MathFunctions.h +0 -362
  137. xtgeo/include/eigen3/Eigen/src/Core/arch/AVX512/PacketMath.h +0 -2303
  138. xtgeo/include/eigen3/Eigen/src/Core/arch/AVX512/TypeCasting.h +0 -89
  139. xtgeo/include/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h +0 -417
  140. xtgeo/include/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h +0 -90
  141. xtgeo/include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +0 -2937
  142. xtgeo/include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +0 -221
  143. xtgeo/include/eigen3/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +0 -629
  144. xtgeo/include/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h +0 -2711
  145. xtgeo/include/eigen3/Eigen/src/Core/arch/CUDA/Complex.h +0 -258
  146. xtgeo/include/eigen3/Eigen/src/Core/arch/Default/BFloat16.h +0 -700
  147. xtgeo/include/eigen3/Eigen/src/Core/arch/Default/ConjHelper.h +0 -117
  148. xtgeo/include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +0 -1649
  149. xtgeo/include/eigen3/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +0 -110
  150. xtgeo/include/eigen3/Eigen/src/Core/arch/Default/Half.h +0 -942
  151. xtgeo/include/eigen3/Eigen/src/Core/arch/Default/Settings.h +0 -49
  152. xtgeo/include/eigen3/Eigen/src/Core/arch/Default/TypeCasting.h +0 -120
  153. xtgeo/include/eigen3/Eigen/src/Core/arch/GPU/MathFunctions.h +0 -103
  154. xtgeo/include/eigen3/Eigen/src/Core/arch/GPU/PacketMath.h +0 -1685
  155. xtgeo/include/eigen3/Eigen/src/Core/arch/GPU/TypeCasting.h +0 -80
  156. xtgeo/include/eigen3/Eigen/src/Core/arch/HIP/hcc/math_constants.h +0 -23
  157. xtgeo/include/eigen3/Eigen/src/Core/arch/MSA/Complex.h +0 -648
  158. xtgeo/include/eigen3/Eigen/src/Core/arch/MSA/MathFunctions.h +0 -387
  159. xtgeo/include/eigen3/Eigen/src/Core/arch/MSA/PacketMath.h +0 -1233
  160. xtgeo/include/eigen3/Eigen/src/Core/arch/NEON/Complex.h +0 -584
  161. xtgeo/include/eigen3/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +0 -183
  162. xtgeo/include/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h +0 -75
  163. xtgeo/include/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +0 -4587
  164. xtgeo/include/eigen3/Eigen/src/Core/arch/NEON/TypeCasting.h +0 -1419
  165. xtgeo/include/eigen3/Eigen/src/Core/arch/SSE/Complex.h +0 -351
  166. xtgeo/include/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +0 -199
  167. xtgeo/include/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h +0 -1505
  168. xtgeo/include/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h +0 -142
  169. xtgeo/include/eigen3/Eigen/src/Core/arch/SVE/MathFunctions.h +0 -44
  170. xtgeo/include/eigen3/Eigen/src/Core/arch/SVE/PacketMath.h +0 -752
  171. xtgeo/include/eigen3/Eigen/src/Core/arch/SVE/TypeCasting.h +0 -49
  172. xtgeo/include/eigen3/Eigen/src/Core/arch/SYCL/InteropHeaders.h +0 -232
  173. xtgeo/include/eigen3/Eigen/src/Core/arch/SYCL/MathFunctions.h +0 -301
  174. xtgeo/include/eigen3/Eigen/src/Core/arch/SYCL/PacketMath.h +0 -670
  175. xtgeo/include/eigen3/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +0 -694
  176. xtgeo/include/eigen3/Eigen/src/Core/arch/SYCL/TypeCasting.h +0 -85
  177. xtgeo/include/eigen3/Eigen/src/Core/arch/ZVector/Complex.h +0 -426
  178. xtgeo/include/eigen3/Eigen/src/Core/arch/ZVector/MathFunctions.h +0 -233
  179. xtgeo/include/eigen3/Eigen/src/Core/arch/ZVector/PacketMath.h +0 -1060
  180. xtgeo/include/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h +0 -177
  181. xtgeo/include/eigen3/Eigen/src/Core/functors/BinaryFunctors.h +0 -541
  182. xtgeo/include/eigen3/Eigen/src/Core/functors/NullaryFunctors.h +0 -189
  183. xtgeo/include/eigen3/Eigen/src/Core/functors/StlFunctors.h +0 -166
  184. xtgeo/include/eigen3/Eigen/src/Core/functors/TernaryFunctors.h +0 -25
  185. xtgeo/include/eigen3/Eigen/src/Core/functors/UnaryFunctors.h +0 -1131
  186. xtgeo/include/eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +0 -2645
  187. xtgeo/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +0 -517
  188. xtgeo/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +0 -317
  189. xtgeo/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +0 -145
  190. xtgeo/include/eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +0 -124
  191. xtgeo/include/eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +0 -518
  192. xtgeo/include/eigen3/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +0 -136
  193. xtgeo/include/eigen3/Eigen/src/Core/products/Parallelizer.h +0 -180
  194. xtgeo/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +0 -544
  195. xtgeo/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +0 -295
  196. xtgeo/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +0 -262
  197. xtgeo/include/eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +0 -118
  198. xtgeo/include/eigen3/Eigen/src/Core/products/SelfadjointProduct.h +0 -133
  199. xtgeo/include/eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +0 -94
  200. xtgeo/include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +0 -472
  201. xtgeo/include/eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +0 -317
  202. xtgeo/include/eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +0 -350
  203. xtgeo/include/eigen3/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +0 -255
  204. xtgeo/include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +0 -337
  205. xtgeo/include/eigen3/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +0 -167
  206. xtgeo/include/eigen3/Eigen/src/Core/products/TriangularSolverVector.h +0 -148
  207. xtgeo/include/eigen3/Eigen/src/Core/util/BlasUtil.h +0 -583
  208. xtgeo/include/eigen3/Eigen/src/Core/util/ConfigureVectorization.h +0 -512
  209. xtgeo/include/eigen3/Eigen/src/Core/util/Constants.h +0 -563
  210. xtgeo/include/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h +0 -106
  211. xtgeo/include/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +0 -322
  212. xtgeo/include/eigen3/Eigen/src/Core/util/IndexedViewHelper.h +0 -186
  213. xtgeo/include/eigen3/Eigen/src/Core/util/IntegralConstant.h +0 -272
  214. xtgeo/include/eigen3/Eigen/src/Core/util/MKL_support.h +0 -137
  215. xtgeo/include/eigen3/Eigen/src/Core/util/Macros.h +0 -1464
  216. xtgeo/include/eigen3/Eigen/src/Core/util/Memory.h +0 -1163
  217. xtgeo/include/eigen3/Eigen/src/Core/util/Meta.h +0 -812
  218. xtgeo/include/eigen3/Eigen/src/Core/util/NonMPL2.h +0 -3
  219. xtgeo/include/eigen3/Eigen/src/Core/util/ReenableStupidWarnings.h +0 -31
  220. xtgeo/include/eigen3/Eigen/src/Core/util/ReshapedHelper.h +0 -51
  221. xtgeo/include/eigen3/Eigen/src/Core/util/StaticAssert.h +0 -221
  222. xtgeo/include/eigen3/Eigen/src/Core/util/SymbolicIndex.h +0 -293
  223. xtgeo/include/eigen3/Eigen/src/Core/util/XprHelper.h +0 -856
  224. xtgeo/include/eigen3/Eigen/src/Eigenvalues/ComplexEigenSolver.h +0 -346
  225. xtgeo/include/eigen3/Eigen/src/Eigenvalues/ComplexSchur.h +0 -462
  226. xtgeo/include/eigen3/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +0 -91
  227. xtgeo/include/eigen3/Eigen/src/Eigenvalues/EigenSolver.h +0 -622
  228. xtgeo/include/eigen3/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +0 -418
  229. xtgeo/include/eigen3/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +0 -226
  230. xtgeo/include/eigen3/Eigen/src/Eigenvalues/HessenbergDecomposition.h +0 -374
  231. xtgeo/include/eigen3/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +0 -158
  232. xtgeo/include/eigen3/Eigen/src/Eigenvalues/RealQZ.h +0 -657
  233. xtgeo/include/eigen3/Eigen/src/Eigenvalues/RealSchur.h +0 -558
  234. xtgeo/include/eigen3/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +0 -77
  235. xtgeo/include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +0 -904
  236. xtgeo/include/eigen3/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +0 -87
  237. xtgeo/include/eigen3/Eigen/src/Eigenvalues/Tridiagonalization.h +0 -561
  238. xtgeo/include/eigen3/Eigen/src/Geometry/AlignedBox.h +0 -486
  239. xtgeo/include/eigen3/Eigen/src/Geometry/AngleAxis.h +0 -247
  240. xtgeo/include/eigen3/Eigen/src/Geometry/EulerAngles.h +0 -114
  241. xtgeo/include/eigen3/Eigen/src/Geometry/Homogeneous.h +0 -501
  242. xtgeo/include/eigen3/Eigen/src/Geometry/Hyperplane.h +0 -282
  243. xtgeo/include/eigen3/Eigen/src/Geometry/OrthoMethods.h +0 -235
  244. xtgeo/include/eigen3/Eigen/src/Geometry/ParametrizedLine.h +0 -232
  245. xtgeo/include/eigen3/Eigen/src/Geometry/Quaternion.h +0 -870
  246. xtgeo/include/eigen3/Eigen/src/Geometry/Rotation2D.h +0 -199
  247. xtgeo/include/eigen3/Eigen/src/Geometry/RotationBase.h +0 -206
  248. xtgeo/include/eigen3/Eigen/src/Geometry/Scaling.h +0 -188
  249. xtgeo/include/eigen3/Eigen/src/Geometry/Transform.h +0 -1563
  250. xtgeo/include/eigen3/Eigen/src/Geometry/Translation.h +0 -202
  251. xtgeo/include/eigen3/Eigen/src/Geometry/Umeyama.h +0 -166
  252. xtgeo/include/eigen3/Eigen/src/Geometry/arch/Geometry_SIMD.h +0 -168
  253. xtgeo/include/eigen3/Eigen/src/Householder/BlockHouseholder.h +0 -110
  254. xtgeo/include/eigen3/Eigen/src/Householder/Householder.h +0 -176
  255. xtgeo/include/eigen3/Eigen/src/Householder/HouseholderSequence.h +0 -545
  256. xtgeo/include/eigen3/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +0 -226
  257. xtgeo/include/eigen3/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +0 -212
  258. xtgeo/include/eigen3/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +0 -229
  259. xtgeo/include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +0 -394
  260. xtgeo/include/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +0 -453
  261. xtgeo/include/eigen3/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +0 -444
  262. xtgeo/include/eigen3/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +0 -198
  263. xtgeo/include/eigen3/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +0 -117
  264. xtgeo/include/eigen3/Eigen/src/Jacobi/Jacobi.h +0 -483
  265. xtgeo/include/eigen3/Eigen/src/KLUSupport/KLUSupport.h +0 -358
  266. xtgeo/include/eigen3/Eigen/src/LU/Determinant.h +0 -117
  267. xtgeo/include/eigen3/Eigen/src/LU/FullPivLU.h +0 -877
  268. xtgeo/include/eigen3/Eigen/src/LU/InverseImpl.h +0 -432
  269. xtgeo/include/eigen3/Eigen/src/LU/PartialPivLU.h +0 -624
  270. xtgeo/include/eigen3/Eigen/src/LU/PartialPivLU_LAPACKE.h +0 -83
  271. xtgeo/include/eigen3/Eigen/src/LU/arch/InverseSize4.h +0 -351
  272. xtgeo/include/eigen3/Eigen/src/MetisSupport/MetisSupport.h +0 -137
  273. xtgeo/include/eigen3/Eigen/src/OrderingMethods/Amd.h +0 -435
  274. xtgeo/include/eigen3/Eigen/src/OrderingMethods/Eigen_Colamd.h +0 -1863
  275. xtgeo/include/eigen3/Eigen/src/OrderingMethods/Ordering.h +0 -153
  276. xtgeo/include/eigen3/Eigen/src/PaStiXSupport/PaStiXSupport.h +0 -678
  277. xtgeo/include/eigen3/Eigen/src/PardisoSupport/PardisoSupport.h +0 -545
  278. xtgeo/include/eigen3/Eigen/src/QR/ColPivHouseholderQR.h +0 -674
  279. xtgeo/include/eigen3/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +0 -97
  280. xtgeo/include/eigen3/Eigen/src/QR/CompleteOrthogonalDecomposition.h +0 -635
  281. xtgeo/include/eigen3/Eigen/src/QR/FullPivHouseholderQR.h +0 -713
  282. xtgeo/include/eigen3/Eigen/src/QR/HouseholderQR.h +0 -434
  283. xtgeo/include/eigen3/Eigen/src/QR/HouseholderQR_LAPACKE.h +0 -68
  284. xtgeo/include/eigen3/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +0 -335
  285. xtgeo/include/eigen3/Eigen/src/SVD/BDCSVD.h +0 -1366
  286. xtgeo/include/eigen3/Eigen/src/SVD/JacobiSVD.h +0 -812
  287. xtgeo/include/eigen3/Eigen/src/SVD/JacobiSVD_LAPACKE.h +0 -91
  288. xtgeo/include/eigen3/Eigen/src/SVD/SVDBase.h +0 -376
  289. xtgeo/include/eigen3/Eigen/src/SVD/UpperBidiagonalization.h +0 -414
  290. xtgeo/include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky.h +0 -697
  291. xtgeo/include/eigen3/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +0 -174
  292. xtgeo/include/eigen3/Eigen/src/SparseCore/AmbiVector.h +0 -378
  293. xtgeo/include/eigen3/Eigen/src/SparseCore/CompressedStorage.h +0 -274
  294. xtgeo/include/eigen3/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +0 -352
  295. xtgeo/include/eigen3/Eigen/src/SparseCore/MappedSparseMatrix.h +0 -67
  296. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseAssign.h +0 -270
  297. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseBlock.h +0 -571
  298. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseColEtree.h +0 -206
  299. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseCompressedBase.h +0 -370
  300. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +0 -722
  301. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +0 -150
  302. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +0 -342
  303. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseDiagonalProduct.h +0 -138
  304. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseDot.h +0 -98
  305. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseFuzzy.h +0 -29
  306. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseMap.h +0 -305
  307. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseMatrix.h +0 -1518
  308. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseMatrixBase.h +0 -398
  309. xtgeo/include/eigen3/Eigen/src/SparseCore/SparsePermutation.h +0 -178
  310. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseProduct.h +0 -181
  311. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseRedux.h +0 -49
  312. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseRef.h +0 -397
  313. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseSelfAdjointView.h +0 -659
  314. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseSolverBase.h +0 -124
  315. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +0 -198
  316. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseTranspose.h +0 -92
  317. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseTriangularView.h +0 -189
  318. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseUtil.h +0 -186
  319. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseVector.h +0 -478
  320. xtgeo/include/eigen3/Eigen/src/SparseCore/SparseView.h +0 -254
  321. xtgeo/include/eigen3/Eigen/src/SparseCore/TriangularSolver.h +0 -315
  322. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU.h +0 -923
  323. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLUImpl.h +0 -66
  324. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_Memory.h +0 -226
  325. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_Structs.h +0 -110
  326. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +0 -375
  327. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_Utils.h +0 -80
  328. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_column_bmod.h +0 -181
  329. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_column_dfs.h +0 -179
  330. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +0 -107
  331. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +0 -280
  332. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +0 -126
  333. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +0 -130
  334. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_panel_bmod.h +0 -223
  335. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_panel_dfs.h +0 -258
  336. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_pivotL.h +0 -137
  337. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_pruneL.h +0 -136
  338. xtgeo/include/eigen3/Eigen/src/SparseLU/SparseLU_relax_snode.h +0 -83
  339. xtgeo/include/eigen3/Eigen/src/SparseQR/SparseQR.h +0 -758
  340. xtgeo/include/eigen3/Eigen/src/StlSupport/StdDeque.h +0 -116
  341. xtgeo/include/eigen3/Eigen/src/StlSupport/StdList.h +0 -106
  342. xtgeo/include/eigen3/Eigen/src/StlSupport/StdVector.h +0 -131
  343. xtgeo/include/eigen3/Eigen/src/StlSupport/details.h +0 -84
  344. xtgeo/include/eigen3/Eigen/src/SuperLUSupport/SuperLUSupport.h +0 -1025
  345. xtgeo/include/eigen3/Eigen/src/UmfPackSupport/UmfPackSupport.h +0 -642
  346. xtgeo/include/eigen3/Eigen/src/misc/Image.h +0 -82
  347. xtgeo/include/eigen3/Eigen/src/misc/Kernel.h +0 -79
  348. xtgeo/include/eigen3/Eigen/src/misc/RealSvd2x2.h +0 -55
  349. xtgeo/include/eigen3/Eigen/src/misc/blas.h +0 -440
  350. xtgeo/include/eigen3/Eigen/src/misc/lapack.h +0 -152
  351. xtgeo/include/eigen3/Eigen/src/misc/lapacke.h +0 -16292
  352. xtgeo/include/eigen3/Eigen/src/misc/lapacke_mangling.h +0 -17
  353. xtgeo/include/eigen3/Eigen/src/plugins/ArrayCwiseBinaryOps.h +0 -358
  354. xtgeo/include/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +0 -696
  355. xtgeo/include/eigen3/Eigen/src/plugins/BlockMethods.h +0 -1442
  356. xtgeo/include/eigen3/Eigen/src/plugins/CommonCwiseBinaryOps.h +0 -115
  357. xtgeo/include/eigen3/Eigen/src/plugins/CommonCwiseUnaryOps.h +0 -177
  358. xtgeo/include/eigen3/Eigen/src/plugins/IndexedViewMethods.h +0 -262
  359. xtgeo/include/eigen3/Eigen/src/plugins/MatrixCwiseBinaryOps.h +0 -152
  360. xtgeo/include/eigen3/Eigen/src/plugins/MatrixCwiseUnaryOps.h +0 -95
  361. xtgeo/include/eigen3/Eigen/src/plugins/ReshapedMethods.h +0 -149
  362. xtgeo/include/eigen3/signature_of_eigen3_matrix_library +0 -1
  363. xtgeo/include/eigen3/unsupported/Eigen/AdolcForward +0 -159
  364. xtgeo/include/eigen3/unsupported/Eigen/AlignedVector3 +0 -234
  365. xtgeo/include/eigen3/unsupported/Eigen/ArpackSupport +0 -30
  366. xtgeo/include/eigen3/unsupported/Eigen/AutoDiff +0 -46
  367. xtgeo/include/eigen3/unsupported/Eigen/BVH +0 -95
  368. xtgeo/include/eigen3/unsupported/Eigen/CXX11/Tensor +0 -137
  369. xtgeo/include/eigen3/unsupported/Eigen/CXX11/TensorSymmetry +0 -42
  370. xtgeo/include/eigen3/unsupported/Eigen/CXX11/ThreadPool +0 -74
  371. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +0 -554
  372. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +0 -329
  373. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +0 -247
  374. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +0 -1176
  375. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +0 -1559
  376. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +0 -1093
  377. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +0 -518
  378. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +0 -377
  379. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +0 -1023
  380. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +0 -73
  381. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +0 -6
  382. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +0 -1413
  383. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +0 -575
  384. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +0 -1650
  385. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +0 -1679
  386. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +0 -456
  387. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +0 -1132
  388. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +0 -544
  389. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +0 -214
  390. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +0 -347
  391. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +0 -137
  392. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +0 -6
  393. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +0 -104
  394. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +0 -389
  395. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +0 -1048
  396. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +0 -409
  397. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +0 -236
  398. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +0 -490
  399. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +0 -236
  400. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +0 -983
  401. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +0 -703
  402. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +0 -388
  403. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +0 -669
  404. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +0 -379
  405. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +0 -237
  406. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +0 -191
  407. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +0 -488
  408. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +0 -302
  409. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +0 -33
  410. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +0 -99
  411. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaUndefines.h +0 -44
  412. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +0 -79
  413. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +0 -603
  414. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +0 -738
  415. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +0 -247
  416. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +0 -82
  417. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +0 -263
  418. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +0 -216
  419. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +0 -98
  420. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +0 -327
  421. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +0 -311
  422. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +0 -1102
  423. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +0 -708
  424. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +0 -291
  425. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +0 -322
  426. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +0 -998
  427. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +0 -6
  428. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +0 -966
  429. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +0 -582
  430. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +0 -454
  431. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +0 -465
  432. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +0 -528
  433. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +0 -513
  434. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +0 -471
  435. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +0 -161
  436. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +0 -346
  437. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +0 -303
  438. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +0 -264
  439. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +0 -249
  440. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +0 -629
  441. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +0 -293
  442. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +0 -236
  443. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +0 -338
  444. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +0 -669
  445. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h +0 -67
  446. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +0 -249
  447. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +0 -486
  448. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +0 -236
  449. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h +0 -23
  450. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +0 -40
  451. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +0 -301
  452. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +0 -48
  453. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +0 -20
  454. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +0 -537
  455. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +0 -88
  456. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateArray.h +0 -261
  457. xtgeo/include/eigen3/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +0 -158
  458. xtgeo/include/eigen3/unsupported/Eigen/EulerAngles +0 -43
  459. xtgeo/include/eigen3/unsupported/Eigen/FFT +0 -419
  460. xtgeo/include/eigen3/unsupported/Eigen/IterativeSolvers +0 -51
  461. xtgeo/include/eigen3/unsupported/Eigen/KroneckerProduct +0 -36
  462. xtgeo/include/eigen3/unsupported/Eigen/LevenbergMarquardt +0 -49
  463. xtgeo/include/eigen3/unsupported/Eigen/MPRealSupport +0 -213
  464. xtgeo/include/eigen3/unsupported/Eigen/MatrixFunctions +0 -504
  465. xtgeo/include/eigen3/unsupported/Eigen/MoreVectorization +0 -24
  466. xtgeo/include/eigen3/unsupported/Eigen/NonLinearOptimization +0 -140
  467. xtgeo/include/eigen3/unsupported/Eigen/NumericalDiff +0 -56
  468. xtgeo/include/eigen3/unsupported/Eigen/OpenGLSupport +0 -322
  469. xtgeo/include/eigen3/unsupported/Eigen/Polynomials +0 -137
  470. xtgeo/include/eigen3/unsupported/Eigen/Skyline +0 -39
  471. xtgeo/include/eigen3/unsupported/Eigen/SparseExtra +0 -54
  472. xtgeo/include/eigen3/unsupported/Eigen/SpecialFunctions +0 -103
  473. xtgeo/include/eigen3/unsupported/Eigen/Splines +0 -35
  474. xtgeo/include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +0 -108
  475. xtgeo/include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +0 -730
  476. xtgeo/include/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +0 -220
  477. xtgeo/include/eigen3/unsupported/Eigen/src/BVH/BVAlgorithms.h +0 -293
  478. xtgeo/include/eigen3/unsupported/Eigen/src/BVH/KdBVH.h +0 -223
  479. xtgeo/include/eigen3/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +0 -790
  480. xtgeo/include/eigen3/unsupported/Eigen/src/EulerAngles/EulerAngles.h +0 -355
  481. xtgeo/include/eigen3/unsupported/Eigen/src/EulerAngles/EulerSystem.h +0 -305
  482. xtgeo/include/eigen3/unsupported/Eigen/src/FFT/ei_fftw_impl.h +0 -261
  483. xtgeo/include/eigen3/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +0 -449
  484. xtgeo/include/eigen3/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +0 -187
  485. xtgeo/include/eigen3/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +0 -511
  486. xtgeo/include/eigen3/unsupported/Eigen/src/IterativeSolvers/GMRES.h +0 -335
  487. xtgeo/include/eigen3/unsupported/Eigen/src/IterativeSolvers/IDRS.h +0 -436
  488. xtgeo/include/eigen3/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +0 -90
  489. xtgeo/include/eigen3/unsupported/Eigen/src/IterativeSolvers/IterationController.h +0 -154
  490. xtgeo/include/eigen3/unsupported/Eigen/src/IterativeSolvers/MINRES.h +0 -267
  491. xtgeo/include/eigen3/unsupported/Eigen/src/IterativeSolvers/Scaling.h +0 -193
  492. xtgeo/include/eigen3/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +0 -305
  493. xtgeo/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +0 -84
  494. xtgeo/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +0 -202
  495. xtgeo/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +0 -160
  496. xtgeo/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +0 -188
  497. xtgeo/include/eigen3/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +0 -396
  498. xtgeo/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +0 -441
  499. xtgeo/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +0 -569
  500. xtgeo/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +0 -373
  501. xtgeo/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +0 -705
  502. xtgeo/include/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +0 -368
  503. xtgeo/include/eigen3/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +0 -117
  504. xtgeo/include/eigen3/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +0 -95
  505. xtgeo/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +0 -601
  506. xtgeo/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +0 -657
  507. xtgeo/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/chkder.h +0 -66
  508. xtgeo/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/covar.h +0 -70
  509. xtgeo/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +0 -107
  510. xtgeo/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +0 -79
  511. xtgeo/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +0 -298
  512. xtgeo/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +0 -91
  513. xtgeo/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +0 -30
  514. xtgeo/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +0 -99
  515. xtgeo/include/eigen3/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +0 -49
  516. xtgeo/include/eigen3/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +0 -130
  517. xtgeo/include/eigen3/unsupported/Eigen/src/Polynomials/Companion.h +0 -280
  518. xtgeo/include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +0 -428
  519. xtgeo/include/eigen3/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +0 -143
  520. xtgeo/include/eigen3/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +0 -352
  521. xtgeo/include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrix.h +0 -862
  522. xtgeo/include/eigen3/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +0 -212
  523. xtgeo/include/eigen3/unsupported/Eigen/src/Skyline/SkylineProduct.h +0 -295
  524. xtgeo/include/eigen3/unsupported/Eigen/src/Skyline/SkylineStorage.h +0 -259
  525. xtgeo/include/eigen3/unsupported/Eigen/src/Skyline/SkylineUtil.h +0 -89
  526. xtgeo/include/eigen3/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +0 -122
  527. xtgeo/include/eigen3/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +0 -1079
  528. xtgeo/include/eigen3/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +0 -404
  529. xtgeo/include/eigen3/unsupported/Eigen/src/SparseExtra/MarketIO.h +0 -282
  530. xtgeo/include/eigen3/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +0 -247
  531. xtgeo/include/eigen3/unsupported/Eigen/src/SparseExtra/RandomSetter.h +0 -349
  532. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h +0 -286
  533. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h +0 -68
  534. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h +0 -357
  535. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h +0 -66
  536. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h +0 -1959
  537. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h +0 -118
  538. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h +0 -67
  539. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +0 -167
  540. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h +0 -58
  541. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +0 -330
  542. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +0 -58
  543. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +0 -2045
  544. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +0 -79
  545. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h +0 -46
  546. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h +0 -16
  547. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +0 -46
  548. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h +0 -16
  549. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h +0 -369
  550. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h +0 -54
  551. xtgeo/include/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h +0 -34
  552. xtgeo/include/eigen3/unsupported/Eigen/src/Splines/Spline.h +0 -507
  553. xtgeo/include/eigen3/unsupported/Eigen/src/Splines/SplineFitting.h +0 -431
  554. xtgeo/include/eigen3/unsupported/Eigen/src/Splines/SplineFwd.h +0 -93
  555. xtgeo/share/eigen3/cmake/Eigen3Config.cmake +0 -37
  556. xtgeo/share/eigen3/cmake/Eigen3ConfigVersion.cmake +0 -65
  557. xtgeo/share/eigen3/cmake/UseEigen3.cmake +0 -6
  558. xtgeo/share/pkgconfig/eigen3.pc +0 -9
  559. xtgeo-4.10.0.dist-info/RECORD +0 -652
  560. {xtgeo-4.10.0.dist-info → xtgeo-4.10.1.dist-info}/WHEEL +0 -0
  561. {xtgeo-4.10.0.dist-info → xtgeo-4.10.1.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,966 +0,0 @@
1
- // This file is part of Eigen, a lightweight C++ template library
2
- // for linear algebra.
3
- //
4
- // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5
- //
6
- // This Source Code Form is subject to the terms of the Mozilla
7
- // Public License v. 2.0. If a copy of the MPL was not distributed
8
- // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
-
10
- #ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
11
- #define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H
12
-
13
- namespace Eigen {
14
- namespace internal {
15
-
16
-
17
- #if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
18
- // Full reducers for GPU, don't vectorize for now
19
-
20
- // Reducer function that enables multiple gpu thread to safely accumulate at the same
21
- // output address. It basically reads the current value of the output variable, and
22
- // attempts to update it with the new value. If in the meantime another gpu thread
23
- // updated the content of the output address it will try again.
24
- template <typename T, typename R>
25
- __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) {
26
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
27
- if (sizeof(T) == 4)
28
- {
29
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
30
- unsigned int newval = oldval;
31
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
32
- if (newval == oldval) {
33
- return;
34
- }
35
- unsigned int readback;
36
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
37
- oldval = readback;
38
- newval = oldval;
39
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
40
- if (newval == oldval) {
41
- return;
42
- }
43
- }
44
- }
45
- else if (sizeof(T) == 8) {
46
- unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output);
47
- unsigned long long newval = oldval;
48
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
49
- if (newval == oldval) {
50
- return;
51
- }
52
- unsigned long long readback;
53
- while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) {
54
- oldval = readback;
55
- newval = oldval;
56
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
57
- if (newval == oldval) {
58
- return;
59
- }
60
- }
61
- }
62
- else {
63
- gpu_assert(0 && "Wordsize not supported");
64
- }
65
- #else // EIGEN_CUDA_ARCH >= 300
66
- gpu_assert(0 && "Shouldn't be called on unsupported device");
67
- #endif // EIGEN_CUDA_ARCH >= 300
68
- }
69
-
70
- // We extend atomicExch to support extra data types
71
- template <typename Type>
72
- __device__ inline Type atomicExchCustom(Type* address, Type val) {
73
- return atomicExch(address, val);
74
- }
75
-
76
- template <>
77
- __device__ inline double atomicExchCustom(double* address, double val) {
78
- unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);
79
- return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val)));
80
- }
81
-
82
- #ifdef EIGEN_HAS_GPU_FP16
83
- template <typename R>
84
- __device__ inline void atomicReduce(half2* output, half2 accum, R& reducer) {
85
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
86
- unsigned int newval = oldval;
87
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
88
- if (newval == oldval) {
89
- return;
90
- }
91
- unsigned int readback;
92
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
93
- oldval = readback;
94
- newval = oldval;
95
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
96
- if (newval == oldval) {
97
- return;
98
- }
99
- }
100
- }
101
- // reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations
102
- template <typename R>
103
- __device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reducer) {
104
- half2* houtput=reinterpret_cast<half2*>(output);
105
- half2* haccum=reinterpret_cast<half2*>(&accum);
106
- for(int i=0;i<4;++i){
107
- atomicReduce(houtput+i,*(haccum+i),reducer);
108
- }
109
- }
110
- #endif // EIGEN_HAS_GPU_FP16
111
-
112
- template <>
113
- __device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) {
114
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
115
- atomicAdd(output, accum);
116
- #else // EIGEN_CUDA_ARCH >= 300
117
- gpu_assert(0 && "Shouldn't be called on unsupported device");
118
- #endif // EIGEN_CUDA_ARCH >= 300
119
- }
120
-
121
-
122
- template <typename CoeffType, typename Index>
123
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernel(const CoeffType val, Index num_preserved_coeffs, CoeffType* output) {
124
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
125
- const Index num_threads = blockDim.x * gridDim.x;
126
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
127
- output[i] = val;
128
- }
129
- }
130
-
131
-
132
- template <int BlockSize, int NumPerThread, typename Self,
133
- typename Reducer, typename Index>
134
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
135
- typename Self::CoeffReturnType* output, unsigned int* semaphore) {
136
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
137
- // Initialize the output value
138
- const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
139
- if (gridDim.x == 1) {
140
- if (first_index == 0) {
141
- *output = reducer.initialize();
142
- }
143
- }
144
- else {
145
- if (threadIdx.x == 0) {
146
- unsigned int block = atomicCAS(semaphore, 0u, 1u);
147
- if (block == 0) {
148
- // We're the first block to run, initialize the output value
149
- atomicExchCustom(output, reducer.initialize());
150
- __threadfence();
151
- atomicExch(semaphore, 2u);
152
- }
153
- else {
154
- // Wait for the first block to initialize the output value.
155
- // Use atomicCAS here to ensure that the reads aren't cached
156
- unsigned int val;
157
- do {
158
- val = atomicCAS(semaphore, 2u, 2u);
159
- }
160
- while (val < 2u);
161
- }
162
- }
163
- }
164
-
165
- __syncthreads();
166
-
167
- eigen_assert(gridDim.x == 1 || *semaphore >= 2u);
168
-
169
- typename Self::CoeffReturnType accum = reducer.initialize();
170
- Index max_iter = numext::mini<Index>(num_coeffs - first_index, NumPerThread*BlockSize);
171
- for (Index i = 0; i < max_iter; i+=BlockSize) {
172
- const Index index = first_index + i;
173
- eigen_assert(index < num_coeffs);
174
- typename Self::CoeffReturnType val = input.m_impl.coeff(index);
175
- reducer.reduce(val, &accum);
176
- }
177
-
178
- #pragma unroll
179
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
180
- #if defined(EIGEN_HIPCC)
181
- // use std::is_floating_point to determine the type of reduced_val
182
- // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error
183
- // and list the float and int versions of __shfl_down as the candidate functions.
184
- if (std::is_floating_point<typename Self::CoeffReturnType>::value) {
185
- reducer.reduce(__shfl_down(static_cast<float>(accum), offset, warpSize), &accum);
186
- } else {
187
- reducer.reduce(__shfl_down(static_cast<int>(accum), offset, warpSize), &accum);
188
- }
189
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
190
- reducer.reduce(__shfl_down(accum, offset, warpSize), &accum);
191
- #else
192
- reducer.reduce(__shfl_down_sync(0xFFFFFFFF, accum, offset, warpSize), &accum);
193
- #endif
194
- }
195
-
196
- if ((threadIdx.x & (warpSize - 1)) == 0) {
197
- atomicReduce(output, accum, reducer);
198
- }
199
-
200
- if (gridDim.x > 1 && threadIdx.x == 0) {
201
- // Let the last block reset the semaphore
202
- atomicInc(semaphore, gridDim.x + 1);
203
- #if defined(EIGEN_HIPCC)
204
- __threadfence_system();
205
- #endif
206
- }
207
- #else // EIGEN_CUDA_ARCH >= 300
208
- gpu_assert(0 && "Shouldn't be called on unsupported device");
209
- #endif // EIGEN_CUDA_ARCH >= 300
210
- }
211
-
212
-
213
- #ifdef EIGEN_HAS_GPU_FP16
214
- template <typename Self,
215
- typename Reducer, typename Index>
216
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
217
- packet_traits<Eigen::half>::type* scratch) {
218
- eigen_assert(blockDim.x == 1);
219
- eigen_assert(gridDim.x == 1);
220
- typedef packet_traits<Eigen::half>::type packet_type;
221
- Index packet_remainder =
222
- num_coeffs % Index(unpacket_traits<packet_type>::size);
223
- if (packet_remainder != 0) {
224
- half2* h2scratch = reinterpret_cast<half2*>(scratch);
225
- for (Index i = num_coeffs - packet_remainder; i + 2 <= num_coeffs; i += 2) {
226
- *h2scratch =
227
- __halves2half2(input.m_impl.coeff(i), input.m_impl.coeff(i + 1));
228
- h2scratch++;
229
- }
230
- if ((num_coeffs & 1) != 0) {
231
- half lastCoeff = input.m_impl.coeff(num_coeffs - 1);
232
- *h2scratch = __halves2half2(lastCoeff, reducer.initialize());
233
- }
234
- } else {
235
- *scratch = reducer.template initializePacket<packet_type>();
236
- }
237
- }
238
-
239
- template <typename Self,
240
- typename Reducer, typename Index>
241
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionInitKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half* output) {
242
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
243
- const Index num_threads = blockDim.x * gridDim.x;
244
- typedef typename packet_traits<Eigen::half>::type PacketType;
245
-
246
- const Index num_packets =
247
- num_coeffs / Index(unpacket_traits<PacketType>::size);
248
- PacketType* p_output = reinterpret_cast<PacketType*>(output);
249
- for (Index i = thread_id; i < num_packets; i += num_threads) {
250
- p_output[i] = reducer.template initializePacket<PacketType>();
251
- }
252
- Index packet_remainder =
253
- num_coeffs % Index(unpacket_traits<PacketType>::size);
254
- if (thread_id < packet_remainder) {
255
- output[num_coeffs - packet_remainder + thread_id] = reducer.initialize();
256
- }
257
- }
258
-
259
- template <int BlockSize, int NumPerThread, typename Self,
260
- typename Reducer, typename Index>
261
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
262
- half* output, packet_traits<Eigen::half>::type* scratch) {
263
- typedef typename packet_traits<Eigen::half>::type PacketType;
264
- const int packet_width = unpacket_traits<PacketType>::size;
265
- eigen_assert(NumPerThread % packet_width == 0);
266
- const Index first_index =
267
- blockIdx.x * BlockSize * NumPerThread + packet_width * threadIdx.x;
268
-
269
- // Initialize the output value if it wasn't initialized by the ReductionInitKernel
270
-
271
- if (gridDim.x == 1) {
272
- if (first_index == 0) {
273
- int rem = num_coeffs % packet_width;
274
- if (rem != 0) {
275
- half2* p_scratch = reinterpret_cast<half2*>(scratch);
276
- *scratch = reducer.template initializePacket<PacketType>();
277
- for (int i = 0; i < rem / 2; i++) {
278
- *p_scratch = __halves2half2(
279
- input.m_impl.coeff(num_coeffs - packet_width + 2 * i),
280
- input.m_impl.coeff(num_coeffs - packet_width + 2 * i + 1));
281
- p_scratch++;
282
- }
283
- if ((num_coeffs & 1) != 0) {
284
- half last = input.m_impl.coeff(num_coeffs - 1);
285
- *p_scratch = __halves2half2(last, reducer.initialize());
286
- }
287
- } else {
288
- *scratch = reducer.template initializePacket<PacketType>();
289
- }
290
- }
291
- __syncthreads();
292
- }
293
-
294
- PacketType accum = reducer.template initializePacket<PacketType>();
295
- const Index max_iter =
296
- numext::mini<Index>((num_coeffs - first_index) / packet_width,
297
- NumPerThread * BlockSize / packet_width);
298
- for (Index i = 0; i < max_iter; i += BlockSize) {
299
- const Index index = first_index + packet_width * i;
300
- eigen_assert(index + packet_width < num_coeffs);
301
- PacketType val = input.m_impl.template packet<Unaligned>(index);
302
- reducer.reducePacket(val, &accum);
303
- }
304
-
305
- #pragma unroll
306
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
307
- #if defined(EIGEN_HIPCC)
308
- PacketType r1;
309
- half2* hr = reinterpret_cast<half2*>(&r1);
310
- half2* hacc = reinterpret_cast<half2*>(&accum);
311
- for (int i = 0; i < packet_width / 2; i++) {
312
- // FIXME : remove this workaround once we have native half/half2 support for __shfl_down
313
- union { int i; half2 h; } wka_in, wka_out;
314
- wka_in.h = hacc[i];
315
- wka_out.i = __shfl_down(wka_in.i, offset, warpSize);
316
- hr[i] = wka_out.h;
317
- }
318
- reducer.reducePacket(r1, &accum);
319
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
320
- PacketType r1;
321
- half2* hr = reinterpret_cast<half2*>(&r1);
322
- half2* hacc = reinterpret_cast<half2*>(&accum);
323
- for (int i = 0; i < packet_width / 2; i++) {
324
- hr[i] = __shfl_down(hacc[i], offset, warpSize);
325
- }
326
- reducer.reducePacket(r1, &accum);
327
- #else
328
- PacketType r1;
329
- half2* hr = reinterpret_cast<half2*>(&r1);
330
- half2* hacc = reinterpret_cast<half2*>(&accum);
331
- for (int i = 0; i < packet_width / 2; i++) {
332
- hr[i] = __shfl_down_sync(0xFFFFFFFF, hacc[i], (unsigned)offset, warpSize);
333
- }
334
- reducer.reducePacket(r1, &accum);
335
-
336
- #endif
337
- }
338
-
339
- if ((threadIdx.x & (warpSize - 1)) == 0) {
340
- atomicReduce(scratch, accum, reducer);
341
- }
342
-
343
- __syncthreads();
344
- half2* rv1 = reinterpret_cast<half2*>(scratch);
345
- if (packet_width > 2) {
346
- reducer.reducePacket(rv1[2], rv1);
347
- reducer.reducePacket(rv1[3], rv1 + 1);
348
- reducer.reducePacket(rv1[1], rv1);
349
- }
350
- if (gridDim.x == 1) {
351
- if (first_index == 0) {
352
- half tmp = __low2half(*rv1);
353
- reducer.reduce(__high2half(*rv1), &tmp);
354
- *output = tmp;
355
- }
356
- }
357
- }
358
-
359
- template <typename Op>
360
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ReductionCleanupKernelHalfFloat(Op reducer, half* output, packet_traits<Eigen::half>::type* scratch) {
361
- eigen_assert(threadIdx.x == 1);
362
- half2* pscratch = reinterpret_cast<half2*>(scratch);
363
- half tmp = __float2half(0.f);
364
- typedef packet_traits<Eigen::half>::type packet_type;
365
- for (int i = 0; i < unpacket_traits<packet_type>::size; i += 2) {
366
- reducer.reduce(__low2half(*pscratch), &tmp);
367
- reducer.reduce(__high2half(*pscratch), &tmp);
368
- pscratch++;
369
- }
370
- *output = tmp;
371
- }
372
-
373
- #endif // EIGEN_HAS_GPU_FP16
374
-
375
- template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
376
- struct FullReductionLauncher {
377
- static void run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index) {
378
- gpu_assert(false && "Should only be called on doubles, floats and half floats");
379
- }
380
- };
381
-
382
- // Specialization for float and double
383
- template <typename Self, typename Op, typename OutputType, bool PacketAccess>
384
- struct FullReductionLauncher<
385
- Self, Op, OutputType, PacketAccess,
386
- typename internal::enable_if<
387
- internal::is_same<float, OutputType>::value ||
388
- internal::is_same<double, OutputType>::value,
389
- void>::type> {
390
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) {
391
-
392
- typedef typename Self::Index Index;
393
- const int block_size = 256;
394
- const int num_per_thread = 128;
395
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
396
-
397
- unsigned int* semaphore = NULL;
398
- if (num_blocks > 1) {
399
- semaphore = device.semaphore();
400
- }
401
-
402
- LAUNCH_GPU_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>),
403
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore);
404
- }
405
- };
406
-
407
- #ifdef EIGEN_HAS_GPU_FP16
408
- template <typename Self, typename Op>
409
- struct FullReductionLauncher<Self, Op, Eigen::half, false> {
410
- static void run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index) {
411
- gpu_assert(false && "Should not be called since there is no packet accessor");
412
- }
413
- };
414
-
415
- template <typename Self, typename Op>
416
- struct FullReductionLauncher<Self, Op, Eigen::half, true> {
417
- static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) {
418
- typedef typename Self::Index Index;
419
- typedef typename packet_traits<Eigen::half>::type PacketType;
420
-
421
- const int block_size = 256;
422
- const int num_per_thread = 128;
423
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
424
- PacketType* scratch = static_cast<PacketType*>(device.scratchpad());
425
- // half2* scratch = static_cast<half2*>(device.scratchpad());
426
-
427
- if (num_blocks > 1) {
428
- // We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there
429
- // won't be a race conditions between multiple thread blocks.
430
- LAUNCH_GPU_KERNEL((ReductionInitFullReduxKernelHalfFloat<Self, Op, Index>),
431
- 1, 1, 0, device, reducer, self, num_coeffs, scratch);
432
- }
433
-
434
- LAUNCH_GPU_KERNEL((FullReductionKernelHalfFloat<block_size, num_per_thread, Self, Op, Index>),
435
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, scratch);
436
-
437
- if (num_blocks > 1) {
438
- LAUNCH_GPU_KERNEL((ReductionCleanupKernelHalfFloat<Op>),
439
- 1, 1, 0, device, reducer, output, scratch);
440
- }
441
- }
442
- };
443
- #endif // EIGEN_HAS_GPU_FP16
444
-
445
-
446
- template <typename Self, typename Op, bool Vectorizable>
447
- struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
448
- // Unfortunately nvidia doesn't support well exotic types such as complex,
449
- // so reduce the scope of the optimized version of the code to the simple cases
450
- // of doubles, floats and half floats
451
- #ifdef EIGEN_HAS_GPU_FP16
452
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
453
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
454
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
455
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
456
- #else // EIGEN_HAS_GPU_FP16
457
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
458
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
459
- internal::is_same<typename Self::CoeffReturnType, double>::value);
460
- #endif // EIGEN_HAS_GPU_FP16
461
-
462
- template <typename OutputType>
463
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
464
- gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
465
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
466
- // Don't crash when we're called with an input tensor of size 0.
467
- if (num_coeffs == 0) {
468
- return;
469
- }
470
-
471
- FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
472
- }
473
- };
474
-
475
-
476
- template <int NumPerThread, typename Self,
477
- typename Reducer, typename Index>
478
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
479
- typename Self::CoeffReturnType* output) {
480
- #if (defined(EIGEN_HIP_DEVICE_COMPILE) && defined(__HIP_ARCH_HAS_WARP_SHUFFLE__)) || (EIGEN_CUDA_ARCH >= 300)
481
- typedef typename Self::CoeffReturnType Type;
482
- eigen_assert(blockDim.y == 1);
483
- eigen_assert(blockDim.z == 1);
484
- eigen_assert(gridDim.y == 1);
485
- eigen_assert(gridDim.z == 1);
486
-
487
- const int unroll_times = 16;
488
- eigen_assert(NumPerThread % unroll_times == 0);
489
-
490
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread);
491
- const Index num_input_blocks = input_col_blocks * num_preserved_coeffs;
492
-
493
- const Index num_threads = blockDim.x * gridDim.x;
494
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
495
-
496
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
497
- if (gridDim.x == 1) {
498
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
499
- output[i] = reducer.initialize();
500
- }
501
- __syncthreads();
502
- }
503
-
504
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
505
- const Index row = i / input_col_blocks;
506
-
507
- if (row < num_preserved_coeffs) {
508
- const Index col_block = i % input_col_blocks;
509
- const Index col_begin = col_block * blockDim.x * NumPerThread + threadIdx.x;
510
-
511
- Type reduced_val = reducer.initialize();
512
-
513
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
514
- const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1);
515
- if (last_col >= num_coeffs_to_reduce) {
516
- for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) {
517
- const Type val = input.m_impl.coeff(row * num_coeffs_to_reduce + col);
518
- reducer.reduce(val, &reduced_val);
519
- }
520
- break;
521
- } else {
522
- // Faster version of the loop with no branches after unrolling.
523
- #pragma unroll
524
- for (int k = 0; k < unroll_times; ++k) {
525
- const Index col = col_begin + blockDim.x * (j + k);
526
- reducer.reduce(input.m_impl.coeff(row * num_coeffs_to_reduce + col), &reduced_val);
527
- }
528
- }
529
- }
530
-
531
- #pragma unroll
532
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
533
- #if defined(EIGEN_HIPCC)
534
- // use std::is_floating_point to determine the type of reduced_val
535
- // This is needed because when Type == double, hipcc will give a "call to __shfl_down is ambguous" error
536
- // and list the float and int versions of __shfl_down as the candidate functions.
537
- if (std::is_floating_point<Type>::value) {
538
- reducer.reduce(__shfl_down(static_cast<float>(reduced_val), offset), &reduced_val);
539
- } else {
540
- reducer.reduce(__shfl_down(static_cast<int>(reduced_val), offset), &reduced_val);
541
- }
542
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
543
- reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val);
544
- #else
545
- reducer.reduce(__shfl_down_sync(0xFFFFFFFF, reduced_val, offset), &reduced_val);
546
- #endif
547
- }
548
-
549
- if ((threadIdx.x & (warpSize - 1)) == 0) {
550
- atomicReduce(&(output[row]), reduced_val, reducer);
551
- }
552
- }
553
- }
554
- #else // EIGEN_CUDA_ARCH >= 300
555
- gpu_assert(0 && "Shouldn't be called on unsupported device");
556
- #endif // EIGEN_CUDA_ARCH >= 300
557
- }
558
-
559
- #ifdef EIGEN_HAS_GPU_FP16
560
-
561
- template <int NumPerThread, typename Self,
562
- typename Reducer, typename Index>
563
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void InnerReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
564
- half* output) {
565
- eigen_assert(blockDim.y == 1);
566
- eigen_assert(blockDim.z == 1);
567
- eigen_assert(gridDim.y == 1);
568
- eigen_assert(gridDim.z == 1);
569
-
570
- typedef typename packet_traits<Eigen::half>::type PacketType;
571
- const int packet_width = unpacket_traits<PacketType>::size;
572
- const int unroll_times = 16 / packet_width;
573
- eigen_assert(NumPerThread % unroll_times == 0);
574
- eigen_assert(unroll_times % 2 == 0);
575
-
576
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2);
577
- const Index num_input_blocks = divup<Index>(input_col_blocks * num_preserved_coeffs, 2);
578
-
579
- const Index num_threads = blockDim.x * gridDim.x;
580
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
581
-
582
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
583
- if (gridDim.x == 1) {
584
- Index i = packet_width * thread_id;
585
- for (; i + packet_width <= num_preserved_coeffs;
586
- i += packet_width * num_threads) {
587
- PacketType* poutput = reinterpret_cast<PacketType*>(output + i);
588
- *poutput = reducer.template initializePacket<PacketType>();
589
- }
590
- if (i < num_preserved_coeffs) {
591
- output[i] = reducer.initialize();
592
- }
593
- __syncthreads();
594
- }
595
-
596
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
597
- const Index row = 2 * (i / input_col_blocks); // everybody takes 2 rows
598
-
599
- if (row + 1 < num_preserved_coeffs) {
600
- const Index col_block = i % input_col_blocks;
601
- const Index col_begin =
602
- packet_width * (col_block * blockDim.x * NumPerThread + threadIdx.x);
603
-
604
- PacketType reduced_val1 = reducer.template initializePacket<PacketType>();
605
- PacketType reduced_val2 = reducer.template initializePacket<PacketType>();
606
-
607
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
608
- const Index last_col =
609
- col_begin + blockDim.x * (j + unroll_times - 1) * packet_width;
610
- if (last_col >= num_coeffs_to_reduce) {
611
- Index col = col_begin + blockDim.x * j;
612
- for (; col + packet_width <= num_coeffs_to_reduce;
613
- col += blockDim.x) {
614
- const PacketType val1 = input.m_impl.template packet<Unaligned>(
615
- row * num_coeffs_to_reduce + col);
616
- reducer.reducePacket(val1, &reduced_val1);
617
- const PacketType val2 = input.m_impl.template packet<Unaligned>(
618
- (row + 1) * num_coeffs_to_reduce + col);
619
- reducer.reducePacket(val2, &reduced_val2);
620
- }
621
- if (col < num_coeffs_to_reduce) {
622
- PacketType r1 = reducer.template initializePacket<PacketType>();
623
- PacketType r2 = reducer.template initializePacket<PacketType>();
624
- half2* hr1 = reinterpret_cast<half2*>(&r1);
625
- half2* hr2 = reinterpret_cast<half2*>(&r2);
626
- while (col + 1 < num_coeffs_to_reduce) {
627
- *hr1 = __halves2half2(
628
- input.m_impl.coeff(row * num_coeffs_to_reduce + col),
629
- input.m_impl.coeff(row * num_coeffs_to_reduce + col + 1));
630
- *hr2 = __halves2half2(
631
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col),
632
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col +
633
- 1));
634
- hr1++;
635
- hr2++;
636
- col += 2;
637
- }
638
- if (col < num_coeffs_to_reduce) {
639
- // Peel;
640
- const half last1 =
641
- input.m_impl.coeff(row * num_coeffs_to_reduce + col);
642
- *hr1 = __halves2half2(last1, reducer.initialize());
643
- const half last2 =
644
- input.m_impl.coeff((row + 1) * num_coeffs_to_reduce + col);
645
- *hr2 = __halves2half2(last2, reducer.initialize());
646
- }
647
- reducer.reducePacket(r1, &reduced_val1);
648
- reducer.reducePacket(r2, &reduced_val2);
649
- }
650
- break;
651
- } else {
652
- // Faster version of the loop with no branches after unrolling.
653
- #pragma unroll
654
- for (int k = 0; k < unroll_times; ++k) {
655
- const Index col = col_begin + blockDim.x * (j + k) * packet_width;
656
- reducer.reducePacket(input.m_impl.template packet<Unaligned>(
657
- row * num_coeffs_to_reduce + col),
658
- &reduced_val1);
659
- reducer.reducePacket(input.m_impl.template packet<Unaligned>(
660
- (row + 1) * num_coeffs_to_reduce + col),
661
- &reduced_val2);
662
- }
663
- }
664
- }
665
-
666
- #pragma unroll
667
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
668
- #if defined(EIGEN_HIPCC)
669
- PacketType r1;
670
- PacketType r2;
671
- half2* hr1 = reinterpret_cast<half2*>(&r1);
672
- half2* hr2 = reinterpret_cast<half2*>(&r2);
673
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
674
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
675
- for (int i = 0; i < packet_width / 2; i++) {
676
- // FIXME : remove this workaround once we have native half/half2 support for __shfl_down
677
- union { int i; half2 h; } wka_in1, wka_out1;
678
- wka_in1.h = rv1[i];
679
- wka_out1.i = __shfl_down(wka_in1.i, offset, warpSize);
680
- hr1[i] = wka_out1.h;
681
-
682
- union { int i; half2 h; } wka_in2, wka_out2;
683
- wka_in2.h = rv2[i];
684
- wka_out2.i = __shfl_down(wka_in2.i, offset, warpSize);
685
- hr2[i] = wka_out2.h;
686
- }
687
- reducer.reducePacket(r1, &reduced_val1);
688
- reducer.reducePacket(r2, &reduced_val2);
689
- #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
690
- PacketType r1;
691
- PacketType r2;
692
- half2* hr1 = reinterpret_cast<half2*>(&r1);
693
- half2* hr2 = reinterpret_cast<half2*>(&r2);
694
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
695
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
696
- for (int i = 0; i < packet_width / 2; i++) {
697
- hr1[i] = __shfl_down(rv1[i], offset, warpSize);
698
- hr2[i] = __shfl_down(rv2[i], offset, warpSize);
699
- }
700
- reducer.reducePacket(r1, &reduced_val1);
701
- reducer.reducePacket(r2, &reduced_val2);
702
- #else
703
- PacketType r1;
704
- PacketType r2;
705
- half2* hr1 = reinterpret_cast<half2*>(&r1);
706
- half2* hr2 = reinterpret_cast<half2*>(&r2);
707
- half2* rr1 = reinterpret_cast<half2*>(&reduced_val1);
708
- half2* rr2 = reinterpret_cast<half2*>(&reduced_val2);
709
- for (int i = 0; i < packet_width / 2; i++) {
710
- hr1[i] =
711
- __shfl_down_sync(0xFFFFFFFF, rr1[i], (unsigned)offset, warpSize);
712
- hr2[i] =
713
- __shfl_down_sync(0xFFFFFFFF, rr2[i], (unsigned)offset, warpSize);
714
- }
715
- reducer.reducePacket(r1, &reduced_val1);
716
- reducer.reducePacket(r2, &reduced_val2);
717
-
718
- #endif
719
- }
720
- half2* rv1 = reinterpret_cast<half2*>(&reduced_val1);
721
- half2* rv2 = reinterpret_cast<half2*>(&reduced_val2);
722
- half2 val;
723
- if (packet_width > 2) {
724
- reducer.reducePacket(rv1[2], rv1);
725
- reducer.reducePacket(rv1[3], rv1 + 1);
726
- reducer.reducePacket(rv1[1], rv1);
727
- reducer.reducePacket(rv2[2], rv2);
728
- reducer.reducePacket(rv2[3], rv2 + 1);
729
- reducer.reducePacket(rv2[1], rv2);
730
- }
731
- half val1 = __low2half(*rv1);
732
- reducer.reduce(__high2half(*rv1), &val1);
733
- half val2 = __low2half(*rv2);
734
- reducer.reduce(__high2half(*rv2), &val2);
735
- val = __halves2half2(val1, val2);
736
- if ((threadIdx.x & (warpSize - 1)) == 0) {
737
- half* loc = output + row;
738
- atomicReduce((half2*)loc, val, reducer);
739
- }
740
- }
741
- }
742
- }
743
-
744
- #endif // EIGEN_HAS_GPU_FP16
745
-
746
- template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
747
- struct InnerReductionLauncher {
748
- static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) {
749
- gpu_assert(false && "Should only be called to reduce doubles, floats and half floats on a gpu device");
750
- return true;
751
- }
752
- };
753
-
754
- // Specialization for float and double
755
- template <typename Self, typename Op, typename OutputType, bool PacketAccess>
756
- struct InnerReductionLauncher<
757
- Self, Op, OutputType, PacketAccess,
758
- typename internal::enable_if<
759
- internal::is_same<float, OutputType>::value ||
760
- internal::is_same<double, OutputType>::value,
761
- void>::type> {
762
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
763
- typedef typename Self::Index Index;
764
-
765
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
766
- const int block_size = 256;
767
- const int num_per_thread = 128;
768
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
769
- const int max_blocks = device.getNumGpuMultiProcessors() *
770
- device.maxGpuThreadsPerMultiProcessor() / block_size;
771
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
772
-
773
- if (num_blocks > 1) {
774
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
775
- // won't be a race conditions between multiple thread blocks.
776
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
777
- const int max_blocks = device.getNumGpuMultiProcessors() *
778
- device.maxGpuThreadsPerMultiProcessor() / 1024;
779
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
780
- LAUNCH_GPU_KERNEL((ReductionInitKernel<OutputType, Index>),
781
- num_blocks, 1024, 0, device, reducer.initialize(),
782
- num_preserved_vals, output);
783
- }
784
-
785
- LAUNCH_GPU_KERNEL((InnerReductionKernel<num_per_thread, Self, Op, Index>),
786
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
787
-
788
- return false;
789
- }
790
- };
791
-
792
- #ifdef EIGEN_HAS_GPU_FP16
793
- template <typename Self, typename Op>
794
- struct InnerReductionLauncher<Self, Op, Eigen::half, false> {
795
- static bool run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index, typename Self::Index) {
796
- gpu_assert(false && "Should not be called since there is no packet accessor");
797
- return true;
798
- }
799
- };
800
-
801
- template <typename Self, typename Op>
802
- struct InnerReductionLauncher<Self, Op, Eigen::half, true> {
803
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
804
- typedef typename Self::Index Index;
805
-
806
- if (num_preserved_vals % 2 != 0) {
807
- // Not supported yet, revert to the slower code path
808
- return true;
809
- }
810
-
811
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
812
- const int block_size = /*256*/128;
813
- const int num_per_thread = /*128*/64;
814
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
815
- const int max_blocks = device.getNumGpuMultiProcessors() *
816
- device.maxGpuThreadsPerMultiProcessor() / block_size;
817
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
818
-
819
- if (num_blocks > 1) {
820
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
821
- // won't be a race conditions between multiple thread blocks.
822
- LAUNCH_GPU_KERNEL((ReductionInitKernelHalfFloat<Self, Op, Index>),
823
- 1, 1, 0, device, reducer, self, num_preserved_vals, output);
824
- }
825
-
826
- LAUNCH_GPU_KERNEL((InnerReductionKernelHalfFloat<num_per_thread, Self, Op, Index>),
827
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
828
-
829
- return false;
830
- }
831
- };
832
- #endif // EIGEN_HAS_GPU_FP16
833
-
834
-
835
- template <typename Self, typename Op>
836
- struct InnerReducer<Self, Op, GpuDevice> {
837
- // Unfortunately nvidia doesn't support well exotic types such as complex,
838
- // so reduce the scope of the optimized version of the code to the simple case
839
- // of floats and half floats.
840
- #ifdef EIGEN_HAS_GPU_FP16
841
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
842
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
843
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
844
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
845
- #else // EIGEN_HAS_GPU_FP16
846
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
847
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
848
- internal::is_same<typename Self::CoeffReturnType, double>::value);
849
- #endif // EIGEN_HAS_GPU_FP16
850
-
851
- template <typename OutputType>
852
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
853
- gpu_assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
854
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
855
- // Don't crash when we're called with an input tensor of size 0.
856
- if (num_coeffs == 0) {
857
- return true;
858
- }
859
- // It's faster to use the usual code.
860
- if (num_coeffs_to_reduce <= 128) {
861
- return true;
862
- }
863
-
864
- return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
865
- }
866
- };
867
-
868
- template <int NumPerThread, typename Self,
869
- typename Reducer, typename Index>
870
- __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void OuterReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
871
- typename Self::CoeffReturnType* output) {
872
- const Index num_threads = blockDim.x * gridDim.x;
873
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
874
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
875
- if (gridDim.x == 1) {
876
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
877
- output[i] = reducer.initialize();
878
- }
879
- __syncthreads();
880
- }
881
-
882
- // Do the reduction.
883
- const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread);
884
- for (Index i = thread_id; i < max_iter; i += num_threads) {
885
- const Index input_col = i % num_preserved_coeffs;
886
- const Index input_row = (i / num_preserved_coeffs) * NumPerThread;
887
- typename Self::CoeffReturnType reduced_val = reducer.initialize();
888
- const Index max_row = numext::mini(input_row + NumPerThread, num_coeffs_to_reduce);
889
- for (Index j = input_row; j < max_row; j++) {
890
- typename Self::CoeffReturnType val = input.m_impl.coeff(j * num_preserved_coeffs + input_col);
891
- reducer.reduce(val, &reduced_val);
892
- }
893
- atomicReduce(&(output[input_col]), reduced_val, reducer);
894
- }
895
- }
896
-
897
-
898
- template <typename Self, typename Op>
899
- struct OuterReducer<Self, Op, GpuDevice> {
900
- // Unfortunately nvidia doesn't support well exotic types such as complex,
901
- // so reduce the scope of the optimized version of the code to the simple case
902
- // of floats.
903
- static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful &&
904
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
905
- internal::is_same<typename Self::CoeffReturnType, double>::value);
906
- template <typename Device, typename OutputType>
907
- static
908
- #if !defined(EIGEN_HIPCC)
909
- // FIXME : leaving this EIGEN_DEVICE_FUNC in, results in the following runtime error
910
- // (in the cxx11_tensor_reduction_gpu test)
911
- //
912
- // terminate called after throwing an instance of 'std::runtime_error'
913
- // what(): No device code available for function: _ZN5Eigen8internal20OuterReductionKernelIL...
914
- //
915
- // don't know why this happens (and why is it a runtime error instead of a compile time error)
916
- //
917
- // this will be fixed by HIP PR#457
918
- EIGEN_DEVICE_FUNC
919
- #endif
920
- bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
921
- gpu_assert(false && "Should only be called to reduce doubles or floats on a gpu device");
922
- return true;
923
- }
924
-
925
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
926
- typedef typename Self::Index Index;
927
-
928
- // It's faster to use the usual code.
929
- if (num_coeffs_to_reduce <= 32) {
930
- return true;
931
- }
932
-
933
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
934
- const int block_size = 256;
935
- const int num_per_thread = 16;
936
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
937
- const int max_blocks = device.getNumGpuMultiProcessors() *
938
- device.maxGpuThreadsPerMultiProcessor() / block_size;
939
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
940
-
941
- if (num_blocks > 1) {
942
- // We initialize the outputs in the reduction kernel itself when we don't have to worry
943
- // about race conditions between multiple thread blocks.
944
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
945
- const int max_blocks = device.getNumGpuMultiProcessors() *
946
- device.maxGpuThreadsPerMultiProcessor() / 1024;
947
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
948
- LAUNCH_GPU_KERNEL((ReductionInitKernel<float, Index>),
949
- num_blocks, 1024, 0, device, reducer.initialize(),
950
- num_preserved_vals, output);
951
- }
952
-
953
- LAUNCH_GPU_KERNEL((OuterReductionKernel<num_per_thread, Self, Op, Index>),
954
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
955
-
956
- return false;
957
- }
958
- };
959
-
960
- #endif // defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC)
961
-
962
-
963
- } // end namespace internal
964
- } // end namespace Eigen
965
-
966
- #endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_GPU_H