xmos-ai-tools 1.3.2.dev80__py3-none-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (395) hide show
  1. xmos_ai_tools/__init__.py +7 -0
  2. xmos_ai_tools/io_server/__init__.py +151 -0
  3. xmos_ai_tools/runtime/__init__.py +0 -0
  4. xmos_ai_tools/runtime/buildfiles/aitoolslib.cmake +13 -0
  5. xmos_ai_tools/runtime/buildfiles/aitoolslib.make +8 -0
  6. xmos_ai_tools/runtime/include/flash_server.h +74 -0
  7. xmos_ai_tools/runtime/include/flatbuffers/allocator.h +68 -0
  8. xmos_ai_tools/runtime/include/flatbuffers/array.h +243 -0
  9. xmos_ai_tools/runtime/include/flatbuffers/base.h +474 -0
  10. xmos_ai_tools/runtime/include/flatbuffers/bfbs_generator.h +43 -0
  11. xmos_ai_tools/runtime/include/flatbuffers/buffer.h +142 -0
  12. xmos_ai_tools/runtime/include/flatbuffers/buffer_ref.h +53 -0
  13. xmos_ai_tools/runtime/include/flatbuffers/code_generators.h +235 -0
  14. xmos_ai_tools/runtime/include/flatbuffers/default_allocator.h +64 -0
  15. xmos_ai_tools/runtime/include/flatbuffers/detached_buffer.h +114 -0
  16. xmos_ai_tools/runtime/include/flatbuffers/flatbuffer_builder.h +1197 -0
  17. xmos_ai_tools/runtime/include/flatbuffers/flatbuffers.h +270 -0
  18. xmos_ai_tools/runtime/include/flatbuffers/flatc.h +111 -0
  19. xmos_ai_tools/runtime/include/flatbuffers/flexbuffers.h +1897 -0
  20. xmos_ai_tools/runtime/include/flatbuffers/grpc.h +300 -0
  21. xmos_ai_tools/runtime/include/flatbuffers/hash.h +127 -0
  22. xmos_ai_tools/runtime/include/flatbuffers/idl.h +1232 -0
  23. xmos_ai_tools/runtime/include/flatbuffers/minireflect.h +419 -0
  24. xmos_ai_tools/runtime/include/flatbuffers/pch/flatc_pch.h +39 -0
  25. xmos_ai_tools/runtime/include/flatbuffers/pch/pch.h +38 -0
  26. xmos_ai_tools/runtime/include/flatbuffers/reflection.h +502 -0
  27. xmos_ai_tools/runtime/include/flatbuffers/reflection_generated.h +1449 -0
  28. xmos_ai_tools/runtime/include/flatbuffers/registry.h +128 -0
  29. xmos_ai_tools/runtime/include/flatbuffers/stl_emulation.h +509 -0
  30. xmos_ai_tools/runtime/include/flatbuffers/string.h +64 -0
  31. xmos_ai_tools/runtime/include/flatbuffers/struct.h +53 -0
  32. xmos_ai_tools/runtime/include/flatbuffers/table.h +168 -0
  33. xmos_ai_tools/runtime/include/flatbuffers/util.h +690 -0
  34. xmos_ai_tools/runtime/include/flatbuffers/vector.h +370 -0
  35. xmos_ai_tools/runtime/include/flatbuffers/vector_downward.h +271 -0
  36. xmos_ai_tools/runtime/include/flatbuffers/verifier.h +283 -0
  37. xmos_ai_tools/runtime/include/ioserver.h +44 -0
  38. xmos_ai_tools/runtime/include/lib_nn/api/TransposeConv.h +24 -0
  39. xmos_ai_tools/runtime/include/lib_nn/api/add_int16.h +27 -0
  40. xmos_ai_tools/runtime/include/lib_nn/api/add_int16_transform.h +42 -0
  41. xmos_ai_tools/runtime/include/lib_nn/api/dequantize_int16.h +22 -0
  42. xmos_ai_tools/runtime/include/lib_nn/api/dequantize_int16_transform.h +34 -0
  43. xmos_ai_tools/runtime/include/lib_nn/api/expand_8_to_16.h +8 -0
  44. xmos_ai_tools/runtime/include/lib_nn/api/multiply_int16.h +42 -0
  45. xmos_ai_tools/runtime/include/lib_nn/api/multiply_int16_transform.h +71 -0
  46. xmos_ai_tools/runtime/include/lib_nn/api/nn_api.h +15 -0
  47. xmos_ai_tools/runtime/include/lib_nn/api/nn_bin_types.h +14 -0
  48. xmos_ai_tools/runtime/include/lib_nn/api/nn_config.h +287 -0
  49. xmos_ai_tools/runtime/include/lib_nn/api/nn_conv2d_structs.h +72 -0
  50. xmos_ai_tools/runtime/include/lib_nn/api/nn_image.h +26 -0
  51. xmos_ai_tools/runtime/include/lib_nn/api/nn_layers.h +303 -0
  52. xmos_ai_tools/runtime/include/lib_nn/api/nn_op_helper.h +132 -0
  53. xmos_ai_tools/runtime/include/lib_nn/api/nn_op_utils.h +150 -0
  54. xmos_ai_tools/runtime/include/lib_nn/api/nn_operator.h +18 -0
  55. xmos_ai_tools/runtime/include/lib_nn/api/nn_pooling.h +551 -0
  56. xmos_ai_tools/runtime/include/lib_nn/api/nn_types.h +83 -0
  57. xmos_ai_tools/runtime/include/lib_nn/api/nn_window_params.h +55 -0
  58. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16.h +54 -0
  59. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16_kernel_transform.h +37 -0
  60. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16_mappings.h +13 -0
  61. xmos_ai_tools/runtime/include/lib_nn/api/quadratic_approximation.h +82 -0
  62. xmos_ai_tools/runtime/include/lib_nn/api/quadratic_interpolation.h +23 -0
  63. xmos_ai_tools/runtime/include/lib_nn/api/quantize_int16.h +22 -0
  64. xmos_ai_tools/runtime/include/lib_nn/api/quantize_int16_transform.h +33 -0
  65. xmos_ai_tools/runtime/include/lib_nn/api/version.h +13 -0
  66. xmos_ai_tools/runtime/include/lib_nn/api/vpu_memmove_word_aligned.h +15 -0
  67. xmos_ai_tools/runtime/include/lib_nn/api/vpu_memset_256.h +55 -0
  68. xmos_ai_tools/runtime/include/lib_nn/api/vpu_sim.h +118 -0
  69. xmos_ai_tools/runtime/include/lib_nn/api/xs3_vpu.h +216 -0
  70. xmos_ai_tools/runtime/include/lib_nn/api/xs3a_registers.h +2869 -0
  71. xmos_ai_tools/runtime/include/lib_nn/src/asm/asm_constants.h +41 -0
  72. xmos_ai_tools/runtime/include/lib_nn/src/asm/window_op_plan.h +25 -0
  73. xmos_ai_tools/runtime/include/lib_tflite_micro/api/fast_flash.h +47 -0
  74. xmos_ai_tools/runtime/include/lib_tflite_micro/api/inference_engine.h +218 -0
  75. xmos_ai_tools/runtime/include/lib_tflite_micro/api/memory_parallel_transport.h +52 -0
  76. xmos_ai_tools/runtime/include/lib_tflite_micro/api/version.h +13 -0
  77. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_config.h +17 -0
  78. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_device_memory.h +62 -0
  79. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_shared_config.h +31 -0
  80. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/conv2d_float.h +155 -0
  81. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_common.h +19 -0
  82. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_custom_options.h +28 -0
  83. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_error_reporter.h +32 -0
  84. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_interpreter.h +49 -0
  85. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_ops.h +71 -0
  86. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_profiler.h +49 -0
  87. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_utils.h +160 -0
  88. xmos_ai_tools/runtime/include/lib_tflite_micro/src/thread_call.h +119 -0
  89. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_defs.h +4 -0
  90. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_device.h +4 -0
  91. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_std_descriptors.h +4 -0
  92. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_std_requests.h +4 -0
  93. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud.h +518 -0
  94. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_conf_default.h +11 -0
  95. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_device.h +87 -0
  96. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_std_descriptors.h +191 -0
  97. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_std_requests.h +120 -0
  98. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/XUD_USB_Defines.h +70 -0
  99. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/hid.h +23 -0
  100. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudio10.h +30 -0
  101. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudio20.h +357 -0
  102. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudiocommon.h +168 -0
  103. xmos_ai_tools/runtime/include/signal/micro/kernels/delay_flexbuffers_generated_data.h +25 -0
  104. xmos_ai_tools/runtime/include/signal/micro/kernels/energy_flexbuffers_generated_data.h +28 -0
  105. xmos_ai_tools/runtime/include/signal/micro/kernels/fft_flexbuffers_generated_data.h +37 -0
  106. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_flexbuffers_generated_data.h +25 -0
  107. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_log_flexbuffers_generated_data.h +27 -0
  108. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_spectral_subtraction_flexbuffers_generated_data.h +26 -0
  109. xmos_ai_tools/runtime/include/signal/micro/kernels/framer_flexbuffers_generated_data.h +25 -0
  110. xmos_ai_tools/runtime/include/signal/micro/kernels/irfft.h +31 -0
  111. xmos_ai_tools/runtime/include/signal/micro/kernels/overlap_add_flexbuffers_generated_data.h +25 -0
  112. xmos_ai_tools/runtime/include/signal/micro/kernels/pcan_flexbuffers_generated_data.h +7 -0
  113. xmos_ai_tools/runtime/include/signal/micro/kernels/rfft.h +31 -0
  114. xmos_ai_tools/runtime/include/signal/micro/kernels/stacker_flexbuffers_generated_data.h +25 -0
  115. xmos_ai_tools/runtime/include/signal/micro/kernels/window_flexbuffers_generated_data.h +25 -0
  116. xmos_ai_tools/runtime/include/signal/src/circular_buffer.h +118 -0
  117. xmos_ai_tools/runtime/include/signal/src/complex.h +29 -0
  118. xmos_ai_tools/runtime/include/signal/src/energy.h +38 -0
  119. xmos_ai_tools/runtime/include/signal/src/fft_auto_scale.h +35 -0
  120. xmos_ai_tools/runtime/include/signal/src/filter_bank.h +69 -0
  121. xmos_ai_tools/runtime/include/signal/src/filter_bank_log.h +38 -0
  122. xmos_ai_tools/runtime/include/signal/src/filter_bank_spectral_subtraction.h +73 -0
  123. xmos_ai_tools/runtime/include/signal/src/filter_bank_square_root.h +34 -0
  124. xmos_ai_tools/runtime/include/signal/src/irfft.h +84 -0
  125. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_common.h +49 -0
  126. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_float.h +31 -0
  127. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int16.h +30 -0
  128. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int32.h +31 -0
  129. xmos_ai_tools/runtime/include/signal/src/log.h +30 -0
  130. xmos_ai_tools/runtime/include/signal/src/max_abs.h +31 -0
  131. xmos_ai_tools/runtime/include/signal/src/msb.h +32 -0
  132. xmos_ai_tools/runtime/include/signal/src/overlap_add.h +46 -0
  133. xmos_ai_tools/runtime/include/signal/src/pcan_argc_fixed.h +41 -0
  134. xmos_ai_tools/runtime/include/signal/src/rfft.h +85 -0
  135. xmos_ai_tools/runtime/include/signal/src/square_root.h +32 -0
  136. xmos_ai_tools/runtime/include/signal/src/window.h +31 -0
  137. xmos_ai_tools/runtime/include/signal/testdata/fft_test_data.h +48 -0
  138. xmos_ai_tools/runtime/include/tensorflow/lite/array.h +156 -0
  139. xmos_ai_tools/runtime/include/tensorflow/lite/builtin_op_data.h +22 -0
  140. xmos_ai_tools/runtime/include/tensorflow/lite/builtin_ops.h +241 -0
  141. xmos_ai_tools/runtime/include/tensorflow/lite/c/builtin_op_data.h +20 -0
  142. xmos_ai_tools/runtime/include/tensorflow/lite/c/c_api_types.h +26 -0
  143. xmos_ai_tools/runtime/include/tensorflow/lite/c/common.h +30 -0
  144. xmos_ai_tools/runtime/include/tensorflow/lite/context_util.h +54 -0
  145. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/error_reporter.h +72 -0
  146. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/flatbuffer_conversions.h +440 -0
  147. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/tensor_utils.h +28 -0
  148. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/builtin_op_data.h +626 -0
  149. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/c_api_types.h +178 -0
  150. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/common.h +1496 -0
  151. xmos_ai_tools/runtime/include/tensorflow/lite/core/macros.h +78 -0
  152. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/bits.h +102 -0
  153. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft.h +50 -0
  154. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft_io.h +34 -0
  155. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft_util.h +34 -0
  156. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank.h +63 -0
  157. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank_io.h +35 -0
  158. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h +50 -0
  159. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend.h +64 -0
  160. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend_io.h +31 -0
  161. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h +52 -0
  162. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h +48 -0
  163. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h +33 -0
  164. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_lut.h +40 -0
  165. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale.h +39 -0
  166. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale_io.h +33 -0
  167. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h +45 -0
  168. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h +46 -0
  169. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_io.h +36 -0
  170. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h +50 -0
  171. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h +47 -0
  172. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h +57 -0
  173. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window.h +49 -0
  174. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window_io.h +34 -0
  175. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window_util.h +45 -0
  176. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/common.h +1358 -0
  177. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/compatibility.h +122 -0
  178. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/cppmath.h +40 -0
  179. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/max.h +35 -0
  180. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/min.h +35 -0
  181. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/optimized/neon_check.h +20 -0
  182. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/portable_tensor.h +141 -0
  183. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/portable_tensor_utils.h +623 -0
  184. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/quantization_util.h +292 -0
  185. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/add.h +561 -0
  186. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/add_n.h +86 -0
  187. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/arg_min_max.h +88 -0
  188. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_matmul.h +275 -0
  189. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h +101 -0
  190. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/binary_function.h +91 -0
  191. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/broadcast_args.h +56 -0
  192. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/broadcast_to.h +97 -0
  193. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/ceil.h +37 -0
  194. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/comparisons.h +271 -0
  195. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/concatenation.h +141 -0
  196. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/conv.h +289 -0
  197. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/cumsum.h +175 -0
  198. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depth_to_space.h +79 -0
  199. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h +100 -0
  200. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h +319 -0
  201. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/dequantize.h +78 -0
  202. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/div.h +247 -0
  203. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/elu.h +37 -0
  204. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/exp.h +38 -0
  205. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/fill.h +38 -0
  206. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor.h +39 -0
  207. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor_div.h +35 -0
  208. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor_mod.h +44 -0
  209. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/fully_connected.h +323 -0
  210. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/hard_swish.h +168 -0
  211. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/add.h +250 -0
  212. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h +241 -0
  213. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h +291 -0
  214. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h +126 -0
  215. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h +67 -0
  216. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h +121 -0
  217. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h +18 -0
  218. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h +194 -0
  219. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h +264 -0
  220. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h +117 -0
  221. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h +224 -0
  222. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/l2normalization.h +90 -0
  223. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/leaky_relu.h +69 -0
  224. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/log_softmax.h +256 -0
  225. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/logistic.h +132 -0
  226. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/lstm_cell.h +422 -0
  227. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/maximum_minimum.h +64 -0
  228. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/mul.h +267 -0
  229. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/neg.h +37 -0
  230. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/pad.h +169 -0
  231. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/pooling.h +303 -0
  232. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h +333 -0
  233. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h +244 -0
  234. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/prelu.h +111 -0
  235. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h +140 -0
  236. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/quantize.h +89 -0
  237. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/reduce.h +491 -0
  238. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/requantize.h +70 -0
  239. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/resize_bilinear.h +233 -0
  240. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h +102 -0
  241. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/round.h +51 -0
  242. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/select.h +151 -0
  243. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/slice.h +80 -0
  244. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/softmax.h +233 -0
  245. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h +109 -0
  246. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/space_to_depth.h +80 -0
  247. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/strided_slice.h +147 -0
  248. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/sub.h +465 -0
  249. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/tanh.h +129 -0
  250. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/transpose.h +203 -0
  251. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/transpose_conv.h +225 -0
  252. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/runtime_shape.h +168 -0
  253. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/strided_slice_logic.h +278 -0
  254. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/tensor_ctypes.h +42 -0
  255. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/types.h +1096 -0
  256. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/kernel_util.h +341 -0
  257. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/op_macros.h +49 -0
  258. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/padding.h +115 -0
  259. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h +100 -0
  260. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h +104 -0
  261. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h +58 -0
  262. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h +63 -0
  263. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h +144 -0
  264. xmos_ai_tools/runtime/include/tensorflow/lite/micro/benchmarks/micro_benchmark.h +95 -0
  265. xmos_ai_tools/runtime/include/tensorflow/lite/micro/compatibility.h +32 -0
  266. xmos_ai_tools/runtime/include/tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h +49 -0
  267. xmos_ai_tools/runtime/include/tensorflow/lite/micro/debug_log.h +38 -0
  268. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/micro_model_settings.h +37 -0
  269. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/expected_output_data.h +47 -0
  270. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/input_data.h +108 -0
  271. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/network_model.h +166 -0
  272. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/detection_responder.h +32 -0
  273. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/image_provider.h +38 -0
  274. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/main_functions.h +37 -0
  275. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/model_settings.h +35 -0
  276. xmos_ai_tools/runtime/include/tensorflow/lite/micro/fake_micro_context.h +70 -0
  277. xmos_ai_tools/runtime/include/tensorflow/lite/micro/flatbuffer_utils.h +65 -0
  278. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/activation_utils.h +57 -0
  279. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/activations.h +64 -0
  280. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/add.h +78 -0
  281. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_function_specializations.h +141 -0
  282. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_interface.h +75 -0
  283. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h +56 -0
  284. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h +310 -0
  285. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h +145 -0
  286. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h +78 -0
  287. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/ceva_common.h +24 -0
  288. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/ceva_tflm_lib.h +613 -0
  289. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/mcps_macros.h +115 -0
  290. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/types.h +1286 -0
  291. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/circular_buffer.h +45 -0
  292. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/circular_buffer_flexbuffers_generated_data.h +22 -0
  293. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv.h +117 -0
  294. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv_test.h +94 -0
  295. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/depthwise_conv.h +80 -0
  296. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/dequantize.h +38 -0
  297. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/detection_postprocess_flexbuffers_generated_data.h +25 -0
  298. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ethosu.h +28 -0
  299. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/fully_connected.h +112 -0
  300. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/hard_swish.h +30 -0
  301. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_runner.h +86 -0
  302. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_util.h +150 -0
  303. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/leaky_relu.h +43 -0
  304. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/logical.h +35 -0
  305. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/logistic.h +42 -0
  306. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_eval.h +541 -0
  307. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_eval_test.h +817 -0
  308. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_shared.h +150 -0
  309. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/micro_ops.h +158 -0
  310. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/micro_tensor_utils.h +56 -0
  311. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/mul.h +74 -0
  312. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/pad.h +27 -0
  313. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/pooling.h +142 -0
  314. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/prelu.h +39 -0
  315. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/quantize.h +37 -0
  316. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reduce.h +65 -0
  317. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reshape.h +26 -0
  318. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/softmax.h +67 -0
  319. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/strided_slice.h +40 -0
  320. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/sub.h +60 -0
  321. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/svdf.h +100 -0
  322. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/testdata/conv_test_data.h +37 -0
  323. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h +579 -0
  324. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h +47 -0
  325. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/hifimini/fixedpoint_utils.h +139 -0
  326. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_eval.h +216 -0
  327. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_shared.h +78 -0
  328. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa.h +38 -0
  329. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_add.h +48 -0
  330. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h +89 -0
  331. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_depthwise_conv.h +74 -0
  332. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_fully_connected.h +78 -0
  333. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_pad.h +49 -0
  334. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_pooling.h +76 -0
  335. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_reduce.h +47 -0
  336. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_reshape.h +44 -0
  337. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_softmax.h +58 -0
  338. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_svdf.h +39 -0
  339. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_helpers.h +64 -0
  340. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h +170 -0
  341. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/linear_memory_planner.h +53 -0
  342. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/memory_plan_struct.h +73 -0
  343. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/micro_memory_planner.h +95 -0
  344. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h +133 -0
  345. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_allocation_info.h +138 -0
  346. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_allocator.h +351 -0
  347. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_arena_constants.h +28 -0
  348. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_common.h +38 -0
  349. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_context.h +176 -0
  350. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_graph.h +79 -0
  351. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter.h +189 -0
  352. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_context.h +125 -0
  353. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_graph.h +110 -0
  354. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_log.h +42 -0
  355. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_mutable_op_resolver.h +708 -0
  356. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_op_resolver.h +62 -0
  357. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_profiler.h +140 -0
  358. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_profiler_interface.h +38 -0
  359. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_resource_variable.h +89 -0
  360. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_time.h +36 -0
  361. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_utils.h +162 -0
  362. xmos_ai_tools/runtime/include/tensorflow/lite/micro/mock_micro_graph.h +60 -0
  363. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/interpreter/src/python_ops_resolver.h +21 -0
  364. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/tflite_size/src/flatbuffer_size.h +30 -0
  365. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/tflite_size/src/flatbuffer_size_wrapper.h +33 -0
  366. xmos_ai_tools/runtime/include/tensorflow/lite/micro/recording_micro_allocator.h +125 -0
  367. xmos_ai_tools/runtime/include/tensorflow/lite/micro/recording_micro_interpreter.h +69 -0
  368. xmos_ai_tools/runtime/include/tensorflow/lite/micro/system_setup.h +27 -0
  369. xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helper_custom_ops.h +49 -0
  370. xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helpers.h +334 -0
  371. xmos_ai_tools/runtime/include/tensorflow/lite/micro/testing/micro_test.h +267 -0
  372. xmos_ai_tools/runtime/include/tensorflow/lite/micro/testing/test_conv_model.h +23 -0
  373. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tflite_bridge/flatbuffer_conversions_bridge.h +45 -0
  374. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tflite_bridge/micro_error_reporter.h +36 -0
  375. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/log_utils.h +273 -0
  376. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/metrics.h +41 -0
  377. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/op_resolver.h +127 -0
  378. xmos_ai_tools/runtime/include/tensorflow/lite/portable_type_to_tflitetype.h +75 -0
  379. xmos_ai_tools/runtime/include/tensorflow/lite/schema/schema_generated.h +24644 -0
  380. xmos_ai_tools/runtime/include/tensorflow/lite/schema/schema_utils.h +33 -0
  381. xmos_ai_tools/runtime/include/tile_ram_server.h +38 -0
  382. xmos_ai_tools/runtime/lib/libhost_xtflitemicro.a +0 -0
  383. xmos_ai_tools/runtime/lib/libxtflitemicro.a +0 -0
  384. xmos_ai_tools/xformer/__init__.py +60 -0
  385. xmos_ai_tools/xformer/flash.py +190 -0
  386. xmos_ai_tools/xinterpreters/__init__.py +1 -0
  387. xmos_ai_tools/xinterpreters/exceptions.py +38 -0
  388. xmos_ai_tools/xinterpreters/host_interpreter.py +652 -0
  389. xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.1.0.1.dylib +0 -0
  390. xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.dylib +0 -0
  391. xmos_ai_tools-1.3.2.dev80.data/data/bin/xcore-opt +0 -0
  392. xmos_ai_tools-1.3.2.dev80.dist-info/METADATA +33 -0
  393. xmos_ai_tools-1.3.2.dev80.dist-info/RECORD +395 -0
  394. xmos_ai_tools-1.3.2.dev80.dist-info/WHEEL +5 -0
  395. xmos_ai_tools-1.3.2.dev80.dist-info/top_level.txt +1 -0
@@ -0,0 +1,287 @@
1
+ // Copyright 2020-2021 XMOS LIMITED.
2
+ // This Software is subject to the terms of the XMOS Public Licence: Version 1.
3
+ #pragma once
4
+
5
+ /**
6
+ * @macro CONFIG_SYMMETRIC_SATURATION_GLOBAL
7
+ * @brief Configure whether (supported) operators use `-127` or `-128` as the
8
+ * their lower saturation bound.
9
+ *
10
+ * The output of 8-bit arithmetic on the XS3 VPU has natural symmetric
11
+ * saturation bounds of (`-127`, `127`). This may be unacceptable, in which case
12
+ * (`-128`, `127`) can be used instead.
13
+ *
14
+ * If `CONFIG_SYMMETRIC_SATURATION_GLOBAL` is defined, it is used as the value
15
+ * for each config macro `CONFIG_SYMMETRIC_SATURATION_*` (e.g.
16
+ * `CONFIG_SYMMETRIC_SATURATION_requantize_16_to_8`), unless that macro has been
17
+ * explicitly set.
18
+ *
19
+ * Bypassing the symmetric saturation bound requires additional logic, and so
20
+ * will generally make the operators slower, though this will be more or less
21
+ * significant, depending on the specific operators.
22
+ */
23
+
24
+ /**
25
+ * @macro CONFIG_SYMMETRIC_SATURATION_conv2d_deep
26
+ * @brief Configure whether `-127` or `-128` is used as the saturation limit for
27
+ * `conv2d_deep()`.
28
+ *
29
+ * The output of 8-bit arithmetic on the XS3 VPU has natural symmetric
30
+ * saturation bounds of (`-127`, `127`). This may be unacceptable, in which case
31
+ * (`-128`, `127`) can be used instead.
32
+ *
33
+ * To specify that the symmetric saturation lower bound (`-127`) should be used
34
+ * for `conv2d_deep()`, define `CONFIG_SYMMETRIC_SATURATION_conv2d_deep` to be
35
+ * `1`. If it is defined to `0`, `-128` will be used instead.
36
+ *
37
+ * If `CONFIG_SYMMETRIC_SATURATION_conv2d_deep` is undefined, then the value of
38
+ * `CONFIG_SYMMETRIC_SATURATION_GLOBAL` is used instead, if that is defined. If
39
+ * neither symbol is defined, `CONFIG_SYMMETRIC_SATURATION_conv2d_deep` defaults
40
+ * to 0, using a lower saturation bound of `-128`.
41
+ *
42
+ */
43
+ #ifndef CONFIG_SYMMETRIC_SATURATION_conv2d_deep
44
+ #ifdef CONFIG_SYMMETRIC_SATURATION_GLOBAL
45
+ #define CONFIG_SYMMETRIC_SATURATION_conv2d_deep \
46
+ CONFIG_SYMMETRIC_SATURATION_GLOBAL
47
+ #else
48
+ #define CONFIG_SYMMETRIC_SATURATION_conv2d_deep (0)
49
+ #endif
50
+ #endif
51
+
52
+ /**
53
+ * @macro CONFIG_SYMMETRIC_SATURATION_conv2d_shallowin
54
+ * @brief Configure whether `-127` or `-128` is used as the saturation limit for
55
+ * `conv2d_shallowin()`.
56
+ *
57
+ * The output of 8-bit arithmetic on the XS3 VPU has natural symmetric
58
+ * saturation bounds of (`-127`, `127`). This may be unacceptable, in which case
59
+ * (`-128`, `127`) can be used instead.
60
+ *
61
+ * To specify that the symmetric saturation lower bound (`-127`) should be used
62
+ * for `conv2d_shallowin()`, define
63
+ * `CONFIG_SYMMETRIC_SATURATION_conv2d_shallowin` to be `1`. If it is defined to
64
+ * `0`, `-128` will be used instead.
65
+ *
66
+ * If `CONFIG_SYMMETRIC_SATURATION_conv2d_shallowin` is undefined, then the
67
+ * value of `CONFIG_SYMMETRIC_SATURATION_GLOBAL` is used instead, if that is
68
+ * defined. If neither symbol is defined,
69
+ * `CONFIG_SYMMETRIC_SATURATION_conv2d_shallowin` defaults to 0, using a lower
70
+ * saturation bound of `-128`.
71
+ *
72
+ */
73
+ #ifndef CONFIG_SYMMETRIC_SATURATION_conv2d_shallowin
74
+ #ifdef CONFIG_SYMMETRIC_SATURATION_GLOBAL
75
+ #define CONFIG_SYMMETRIC_SATURATION_conv2d_shallowin \
76
+ CONFIG_SYMMETRIC_SATURATION_GLOBAL
77
+ #else
78
+ #define CONFIG_SYMMETRIC_SATURATION_conv2d_shallowin (0)
79
+ #endif
80
+ #endif
81
+
82
+ /**
83
+ * @macro CONFIG_SYMMETRIC_SATURATION_conv2d_im2col
84
+ * @brief Configure whether `-127` or `-128` is used as the saturation limit for
85
+ * `conv2d_im2col()`.
86
+ *
87
+ * The output of 8-bit arithmetic on the XS3 VPU has natural symmetric
88
+ * saturation bounds of (`-127`, `127`). This may be unacceptable, in which case
89
+ * (`-128`, `127`) can be used instead.
90
+ *
91
+ * To specify that the symmetric saturation lower bound (`-127`) should be used
92
+ * for `conv2d_shallowin()`, define `CONFIG_SYMMETRIC_SATURATION_conv2d_im2col`
93
+ * to be `1`. If it is defined to `0`, `-128` will be used instead.
94
+ *
95
+ * If `CONFIG_SYMMETRIC_SATURATION_conv2d_im2col` is undefined, then the value
96
+ * of `CONFIG_SYMMETRIC_SATURATION_GLOBAL` is used instead, if that is defined.
97
+ * If neither symbol is defined, `CONFIG_SYMMETRIC_SATURATION_conv2d_im2col`
98
+ * defaults to 0, using a lower saturation bound of `-128`.
99
+ *
100
+ */
101
+ #ifndef CONFIG_SYMMETRIC_SATURATION_conv2d_im2col
102
+ #ifdef CONFIG_SYMMETRIC_SATURATION_GLOBAL
103
+ #define CONFIG_SYMMETRIC_SATURATION_conv2d_im2col \
104
+ CONFIG_SYMMETRIC_SATURATION_GLOBAL
105
+ #else
106
+ #define CONFIG_SYMMETRIC_SATURATION_conv2d_im2col (0)
107
+ #endif
108
+ #endif
109
+
110
+ /**
111
+ * @macro CONFIG_SYMMETRIC_SATURATION_conv2d_depthwise
112
+ * @brief Configure whether `-127` or `-128` is used as the saturation limit for
113
+ * `conv2d_depthwise()`.
114
+ *
115
+ * The output of 8-bit arithmetic on the XS3 VPU has natural symmetric
116
+ * saturation bounds of (`-127`, `127`). This may be unacceptable, in which case
117
+ * (`-128`, `127`) can be used instead.
118
+ *
119
+ * To specify that the symmetric saturation lower bound (`-127`) should be used
120
+ * for `conv2d_depthwise()`, define
121
+ * `CONFIG_SYMMETRIC_SATURATION_conv2d_depthwise` to be `1`. If it is defined to
122
+ * `0`, `-128` will be used instead.
123
+ *
124
+ * If `CONFIG_SYMMETRIC_SATURATION_conv2d_depthwise` is undefined, then the
125
+ * value of `CONFIG_SYMMETRIC_SATURATION_GLOBAL` is used instead, if that is
126
+ * defined. If neither symbol is defined,
127
+ * `CONFIG_SYMMETRIC_SATURATION_conv2d_depthwise` defaults to 0, using a lower
128
+ * saturation bound of `-128`.
129
+ *
130
+ */
131
+ #ifndef CONFIG_SYMMETRIC_SATURATION_conv2d_depthwise
132
+ #ifdef CONFIG_SYMMETRIC_SATURATION_GLOBAL
133
+ #define CONFIG_SYMMETRIC_SATURATION_conv2d_depthwise \
134
+ CONFIG_SYMMETRIC_SATURATION_GLOBAL
135
+ #else
136
+ #define CONFIG_SYMMETRIC_SATURATION_conv2d_depthwise (0)
137
+ #endif
138
+ #endif
139
+
140
+ /**
141
+ * @macro CONFIG_SYMMETRIC_SATURATION_conv2d_1x1
142
+ * @brief Configure whether `-127` or `-128` is used as the saturation limit for
143
+ * `conv2d_1x1()`.
144
+ *
145
+ * The output of 8-bit arithmetic on the XS3 VPU has natural symmetric
146
+ * saturation bounds of (`-127`, `127`). This may be unacceptable, in which case
147
+ * (`-128`, `127`) can be used instead.
148
+ *
149
+ * To specify that the symmetric saturation lower bound (`-127`) should be used
150
+ * for `conv2d_1x1()`, define `CONFIG_SYMMETRIC_SATURATION_conv2d_1x1` to be
151
+ * `1`. If it is defined to `0`, `-128` will be used instead.
152
+ *
153
+ * If `CONFIG_SYMMETRIC_SATURATION_conv2d_1x1` is undefined, then the value of
154
+ * `CONFIG_SYMMETRIC_SATURATION_GLOBAL` is used instead, if that is defined. If
155
+ * neither symbol is defined, `CONFIG_SYMMETRIC_SATURATION_conv2d_1x1` defaults
156
+ * to 0, using a lower saturation bound of `-128`.
157
+ *
158
+ */
159
+ #ifndef CONFIG_SYMMETRIC_SATURATION_conv2d_1x1
160
+ #ifdef CONFIG_SYMMETRIC_SATURATION_GLOBAL
161
+ #define CONFIG_SYMMETRIC_SATURATION_conv2d_1x1 \
162
+ CONFIG_SYMMETRIC_SATURATION_GLOBAL
163
+ #else
164
+ #define CONFIG_SYMMETRIC_SATURATION_conv2d_1x1 (0)
165
+ #endif
166
+ #endif
167
+
168
+ /**
169
+ * @macro CONFIG_SYMMETRIC_SATURATION_avgpool2d
170
+ * @brief Configure whether `-127` or `-128` is used as the saturation limit for
171
+ * `avgpool2d()`.
172
+ *
173
+ * The output of 8-bit arithmetic on the XS3 VPU has natural symmetric
174
+ * saturation bounds of (`-127`, `127`). This may be unacceptable, in which case
175
+ * (`-128`, `127`) can be used instead.
176
+ *
177
+ * To specify that the symmetric saturation lower bound (`-127`) should be used
178
+ * for `avgpool2d()`, define `CONFIG_SYMMETRIC_SATURATION_avgpool2d` to be `1`.
179
+ * If it is defined to `0`, `-128` will be used instead.
180
+ *
181
+ * If `CONFIG_SYMMETRIC_SATURATION_avgpool2d` is undefined, then the value of
182
+ * `CONFIG_SYMMETRIC_SATURATION_GLOBAL` is used instead, if that is defined. If
183
+ * neither symbol is defined, `CONFIG_SYMMETRIC_SATURATION_avgpool2d` defaults
184
+ * to 0, using a lower saturation bound of `-128`.
185
+ *
186
+ */
187
+ #ifndef CONFIG_SYMMETRIC_SATURATION_avgpool2d
188
+ #ifdef CONFIG_SYMMETRIC_SATURATION_GLOBAL
189
+ #define CONFIG_SYMMETRIC_SATURATION_avgpool2d CONFIG_SYMMETRIC_SATURATION_GLOBAL
190
+ #else
191
+ #define CONFIG_SYMMETRIC_SATURATION_avgpool2d (0)
192
+ #endif
193
+ #endif
194
+
195
+ /**
196
+ * @macro CONFIG_SYMMETRIC_SATURATION_avgpool2d_global
197
+ * @brief Configure whether `-127` or `-128` is used as the saturation limit for
198
+ * `avgpool2d_global()`.
199
+ *
200
+ * The output of 8-bit arithmetic on the XS3 VPU has natural symmetric
201
+ * saturation bounds of (`-127`, `127`). This may be unacceptable, in which case
202
+ * (`-128`, `127`) can be used instead.
203
+ *
204
+ * To specify that the symmetric saturation lower bound (`-127`) should be used
205
+ * for `avgpool2d_global()`, define
206
+ * `CONFIG_SYMMETRIC_SATURATION_avgpool2d_global` to be `1`. If it is defined to
207
+ * `0`, `-128` will be used instead.
208
+ *
209
+ * If `CONFIG_SYMMETRIC_SATURATION_avgpool2d_global` is undefined, then the
210
+ * value of `CONFIG_SYMMETRIC_SATURATION_GLOBAL` is used instead, if that is
211
+ * defined. If neither symbol is defined,
212
+ * `CONFIG_SYMMETRIC_SATURATION_avgpool2d_global` defaults to 0, using a lower
213
+ * saturation bound of `-128`.
214
+ *
215
+ */
216
+ #ifndef CONFIG_SYMMETRIC_SATURATION_avgpool2d_global
217
+ #ifdef CONFIG_SYMMETRIC_SATURATION_GLOBAL
218
+ #define CONFIG_SYMMETRIC_SATURATION_avgpool2d_global \
219
+ CONFIG_SYMMETRIC_SATURATION_GLOBAL
220
+ #else
221
+ #define CONFIG_SYMMETRIC_SATURATION_avgpool2d_global (0)
222
+ #endif
223
+ #endif
224
+
225
+ /**
226
+ * @macro CONFIG_SYMMETRIC_SATURATION_requantize_16_to_8
227
+ * @brief Configure whether `-127` or `-128` is used as the saturation limit for
228
+ * `requantize_16_to_8()`.
229
+ *
230
+ * The output of 8-bit arithmetic on the XS3 VPU has natural symmetric
231
+ * saturation bounds of (`-127`, `127`). This may be unacceptable, in which case
232
+ * (`-128`, `127`) can be used instead.
233
+ *
234
+ * To specify that the symmetric saturation lower bound (`-127`) should be used
235
+ * for `requantize_16_to_8()`, define
236
+ * `CONFIG_SYMMETRIC_SATURATION_requantize_16_to_8` to be `1`. If it is defined
237
+ * to `0`, `-128` will be used instead.
238
+ *
239
+ * If `CONFIG_SYMMETRIC_SATURATION_requantize_16_to_8` is undefined, then the
240
+ * value of `CONFIG_SYMMETRIC_SATURATION_GLOBAL` is used instead, if that is
241
+ * defined. If neither symbol is defined,
242
+ * `CONFIG_SYMMETRIC_SATURATION_requantize_16_to_8` defaults to 0, using a lower
243
+ * saturation bound of `-128`.
244
+ *
245
+ * Unfortunately, bypassing the symmetric saturation bounds requires significant
246
+ * additional logic, and so with the symmetric saturation bound,
247
+ * `requantize_16_to_8()` is approximately 2.5x faster.
248
+ *
249
+ */
250
+ #ifndef CONFIG_SYMMETRIC_SATURATION_requantize_16_to_8
251
+ #ifdef CONFIG_SYMMETRIC_SATURATION_GLOBAL
252
+ #define CONFIG_SYMMETRIC_SATURATION_requantize_16_to_8 \
253
+ CONFIG_SYMMETRIC_SATURATION_GLOBAL
254
+ #else
255
+ #define CONFIG_SYMMETRIC_SATURATION_requantize_16_to_8 (0)
256
+ #endif
257
+ #endif
258
+
259
+ /**
260
+ * @macro CONFIG_SYMMETRIC_SATURATION_fully_connected_8
261
+ * @brief Configure whether `-127` or `-128` is used as the saturation limit for
262
+ * `fully_connected_8()`.
263
+ *
264
+ * The output of 8-bit arithmetic on the XS3 VPU has natural symmetric
265
+ * saturation bounds of (`-127`, `127`). This may be unacceptable, in which case
266
+ * (`-128`, `127`) can be used instead.
267
+ *
268
+ * To specify that the symmetric saturation lower bound (`-127`) should be used
269
+ * for `fully_connected_8()`, define
270
+ * `CONFIG_SYMMETRIC_SATURATION_fully_connected_8` to be `1`. If it is defined
271
+ * to `0`, `-128` will be used instead.
272
+ *
273
+ * If `CONFIG_SYMMETRIC_SATURATION_fully_connected_8` is undefined, then the
274
+ * value of `CONFIG_SYMMETRIC_SATURATION_GLOBAL` is used instead, if that is
275
+ * defined. If neither symbol is defined,
276
+ * `CONFIG_SYMMETRIC_SATURATION_fully_connected_8` defaults to 0, using a lower
277
+ * saturation bound of `-128`.
278
+ *
279
+ */
280
+ #ifndef CONFIG_SYMMETRIC_SATURATION_fully_connected_8
281
+ #ifdef CONFIG_SYMMETRIC_SATURATION_GLOBAL
282
+ #define CONFIG_SYMMETRIC_SATURATION_fully_connected_8 \
283
+ CONFIG_SYMMETRIC_SATURATION_GLOBAL
284
+ #else
285
+ #define CONFIG_SYMMETRIC_SATURATION_fully_connected_8 (0)
286
+ #endif
287
+ #endif
@@ -0,0 +1,72 @@
1
+ // Copyright 2020-2021 XMOS LIMITED.
2
+ // This Software is subject to the terms of the XMOS Public Licence: Version 1.
3
+ #ifndef CONV2D_STRUCTS_H_
4
+ #define CONV2D_STRUCTS_H_
5
+
6
+ #include "nn_image.h"
7
+
8
+ #define CONV2D_OUTPUT_LENGTH(input_length, filter_size, dilation, stride) \
9
+ (((input_length - (filter_size + (filter_size - 1) * (dilation - 1)) + 1) + \
10
+ stride - 1) / \
11
+ stride)
12
+
13
+ #define CONV2D_INPUT_LENGTH(output_length, filter_size, dilation, stride) \
14
+ (output_length * stride - (stride - 1) - 1 + \
15
+ (filter_size + (filter_size - 1) * (dilation - 1)))
16
+
17
+ /**
18
+ * Describes the relationship between the convolution window and the
19
+ * input image.
20
+ */
21
+ typedef struct {
22
+ /** The shape of the convolution window */
23
+ struct {
24
+ /** Height of the convolution window in pixels */
25
+ unsigned height;
26
+ /** Width of the convolution window in pixels */
27
+ unsigned width;
28
+ } shape;
29
+
30
+ /**
31
+ * The initial position of the convolution window, relative to the input
32
+ * image.
33
+ *
34
+ * The position given by this pair indicates where the top-left pixel of the
35
+ * convolution window begins relative to the top-left pixel of the input
36
+ * image.
37
+ *
38
+ * If this pair is, for example, `(0, 0)`, then the convolution window starts
39
+ * at the top left of the input image and involves no top or left padding.
40
+ */
41
+ struct {
42
+ /** Row offset of convolution window inital position */
43
+ int row;
44
+ /** Column offset of convolution window inital position */
45
+ int column;
46
+ } start;
47
+
48
+ /**
49
+ * The strides of the convolution window. These are the number of (input
50
+ * image) pixels that the convolution window moves down and right for each
51
+ * pixel moved down or right in the output image.
52
+ */
53
+ struct {
54
+ /** Vertical stride of the convolution window. */
55
+ int vertical;
56
+ /** Horizontal stride of the convolution window */
57
+ int horizontal;
58
+ } stride;
59
+
60
+ /**
61
+ * Note: Only supported where explicitly mentioned.
62
+ */
63
+ struct {
64
+ /** Vertical dilation of the convolution window. */
65
+ int vertical;
66
+ /** Horizontal dilation of the convolution window */
67
+ int horizontal;
68
+ } dilation;
69
+
70
+ } nn_window_params_t;
71
+
72
+ #endif // CONV2D_STRUCTS_H_
@@ -0,0 +1,26 @@
1
+ // Copyright 2020-2021 XMOS LIMITED.
2
+ // This Software is subject to the terms of the XMOS Public Licence: Version 1.
3
+ #ifndef IMAGE_H_
4
+ #define IMAGE_H_
5
+
6
+ #include "nn_types.h"
7
+
8
+ /**
9
+ * This struct describes the basic parameters for an image tensor
10
+ */
11
+ typedef struct {
12
+ /**
13
+ * Height of an image (in pixels)
14
+ */
15
+ uint32_t height;
16
+ /**
17
+ * Width of the image (in pixels)
18
+ */
19
+ uint32_t width;
20
+ /**
21
+ * Number of channels per pixel
22
+ */
23
+ channel_count_t channels;
24
+ } nn_image_params_t;
25
+
26
+ #endif // IMAGE_H_
@@ -0,0 +1,303 @@
1
+ // Copyright 2020-2021 XMOS LIMITED.
2
+ // This Software is subject to the terms of the XMOS Public Licence: Version 1.
3
+ #ifndef LAYERS_H_
4
+ #define LAYERS_H_
5
+ #include "nn_api.h"
6
+ #include "nn_bin_types.h"
7
+ #include "nn_image.h"
8
+ #include <string.h>
9
+
10
+ /**
11
+ * Struct represents the parameters needed by each `bsign_8()` job.
12
+ *
13
+ * Values are set by `bsign_8_prepare()`.
14
+ *
15
+ * @note This struct is intended to be opaque.
16
+ */
17
+ typedef struct {
18
+ mem_stride_t start;
19
+ int32_t length;
20
+ } nn_bsign_8_job_t;
21
+
22
+ /**
23
+ * @brief Initialize an instance of the @oper{bsign_8} operator.
24
+ *
25
+ * See @oper_ref{bsign_8} for more details about the @oper{bsign_8} operator. To
26
+ * invoke a
27
+ * @oper{bsign_8} job, call bsign_8().
28
+ *
29
+ * When bsign_8() is called, a job (`nn_bsign_8_job_t`) must be supplied to tell
30
+ * it how to do its work. This function initializes one or more jobs to be
31
+ * supplied in subsequent calls to bsign_8().
32
+ *
33
+ * Each job computes a range of elements in the output vector (possibly the
34
+ * entire vector).
35
+ *
36
+ * `jobs` points to an array of `nn_bsign_8_t` to be initialized. Each element
37
+ * represents one job. There should be `job_count` elements in the array.
38
+ *
39
+ * `N` is the number of elements @math{N} in the input vector @tensor{x} and
40
+ * output vector @tensor{y}.
41
+ *
42
+ * `job_count` indicates the number of jobs to be initialized (and thus the
43
+ * number of elements in the `jobs` array).
44
+ *
45
+ * Unlike many other operators, @oper{bsign_8} will automatically divide the
46
+ * work to be done as evenly as possible between jobs.
47
+ *
48
+ * @param plan [out] The plan to be initialized.
49
+ * @param jobs [out] Array of jobs to be initialized.
50
+ * @param N [in] The number of elements in the input.
51
+ * @param[in] zero_point The value @math{z_0} to be used for padding (for all
52
+ * channels)
53
+ * @param job_count [in] The number of jobs to be initialized.
54
+ */
55
+ void bsign_8_prepare(nn_bsign_8_job_t *jobs, int8_t *zero_point_vect,
56
+ const uint32_t N, const int8_t zero_point,
57
+ const int32_t job_count);
58
+
59
+ /**
60
+ * @brief Execute @oper{bsign_8} job.
61
+ *
62
+ * See @oper_ref{bsign_8} for more details about the @oper{requantize_16_to_8}
63
+ * operator.
64
+ *
65
+ * An instance of the @oper{bsign_8} operator requires an job (but no plan is
66
+ * required). See bsign_8_prepare() for more details.
67
+ *
68
+ * `Y` points to the output vector @tensor{y} with length @math{N}. The address
69
+ * supplied for `Y` should be the start address of the output vector (for any
70
+ * job being processed).
71
+ *
72
+ * `X` points to the input vector @tensor{x} with length @math{N}. The address
73
+ * supplied for `X` should be the start address of the input vector (for any job
74
+ * being processed).
75
+ *
76
+ * `job` points to the (initialized) @oper{bsign_8} job to be performed with
77
+ * this call.
78
+ *
79
+ * @requires_word_alignment{Y,X}
80
+ *
81
+ * @param Y [out] The output vector @tensor{y}
82
+ * @param X [in] The input vector @tensor{x}
83
+ * @param plan [in] The @oper{bsign_8} plan to be processed
84
+ * @param job [in] The @oper{bsign_8} job to be processed
85
+ */
86
+ void bsign_8(bnn_b32_t *Y, const int8_t *X, const int8_t *zero_point_vect,
87
+ const nn_bsign_8_job_t *job);
88
+
89
+ /**
90
+ * Struct represents the parameters needed by each `pad_run()` job.
91
+ *
92
+ * Values are set by `pad_prepare()`.
93
+ *
94
+ * @note This struct is intended to be opaque.
95
+ */
96
+ typedef struct nn_pad_plan_t {
97
+ unsigned top_pad_bytes;
98
+ unsigned mid_loop_count;
99
+ unsigned left_pad_bytes;
100
+ unsigned mid_copy_bytes;
101
+ unsigned right_pad_bytes;
102
+ unsigned bottom_pad_bytes;
103
+ } nn_pad_plan_t;
104
+
105
+ typedef struct padding_sizes_t {
106
+ int32_t top;
107
+ int32_t bottom;
108
+ int32_t left;
109
+ int32_t right;
110
+ } padding_sizes_t;
111
+
112
+ /**
113
+ * Func to calculate n_3
114
+ */
115
+ void pad_3_to_4_prepare(uint32_t *n_3, const unsigned height,
116
+ const unsigned width);
117
+
118
+ /** Function that pads an image with 3-byte values with a 0.
119
+ * The output image must be word aligned. This function solves the general
120
+ * case and calls an optimised assembly version for the bulk copy.
121
+ *
122
+ * @param outputs output values, every word contains 3 bytes and a zero
123
+ * @param inputs input values, RGBRGBRGBRGB...
124
+ * @param N_3 number of blocks of 3 bytes to copy
125
+ *
126
+ * @returns The inner product
127
+ */
128
+ extern void pad_3_to_4_run(int8_t outputs[], int8_t inputs[], uint32_t N_3,
129
+ uint32_t pad_val);
130
+ extern void pad_3_to_4_ref(int8_t outputs[], int8_t inputs[], uint32_t N_3,
131
+ uint32_t pad_val);
132
+
133
+ typedef struct nn_mul_params_t {
134
+ int8_t in1_zero_point;
135
+ int8_t in2_zero_point;
136
+ int16_t bias;
137
+ int16_t scalar;
138
+ int16_t vlashr_shr;
139
+ } nn_mul_params_t;
140
+
141
+ void mul_boggle(nn_mul_params_t *params, double in1Scale, double in2Scale,
142
+ double outputScale, int8_t in1ZeroPoint, int8_t in2ZeroPoint,
143
+ int8_t outputZeroPoint);
144
+ void mul_elementwise(const int8_t *in1_data, const int8_t *in2_data,
145
+ int element_count, nn_mul_params_t *params,
146
+ int8_t *out_data);
147
+
148
+ // /**
149
+ // * Describes the parameters needed for an @oper{add_elementwise} operator.
150
+ // @see add_elementwise().
151
+ // */
152
+ // typedef struct {
153
+ // /**
154
+ // * The parameters that are applied to each input element.
155
+ // */
156
+ //
157
+ // /**
158
+ // * `m1` and `m2` are the multiplers for the inputs.
159
+ // */
160
+ // int16_t m1[16];
161
+ // int16_t m2[16];
162
+
163
+ // /**
164
+ // * `shift` is the number of bits the 32-bit accumulator is
165
+ // * right-shifted by to obtain a final result for each element.
166
+ // */
167
+ // int16_t shift[16];
168
+
169
+ // /**
170
+ // * `bias_hi` and `bias_lo` are together, the 32-bit bias to
171
+ // * which the scaled inputs are added.
172
+ // */
173
+ // int16_t bias_lo[16];
174
+ // int16_t bias_hi[16];
175
+
176
+ // } nn_add_params_t;
177
+
178
+ typedef struct {
179
+ int16_t m1[16];
180
+ int16_t m2[16];
181
+ int16_t shift[16];
182
+ int16_t bias_hi[16];
183
+ int16_t bias_lo[16];
184
+ } nn_add_params_t;
185
+
186
+ /**
187
+ * @brief Invoke an @oper{add_elementwise} job.
188
+ *
189
+ * The @oper{add_elementwise} operator adds together two quantized 8-bit input
190
+ * vectors, @tensor{x_0} and @tensor{x_1} element-by-element to produce the
191
+ * output vector @tensor{y}. This function assumes that the input vectors and
192
+ * the output vector each require different quantization parameters.
193
+ *
194
+ * In order to add together two quantized vectors, their quantization parameters
195
+ * must match. The contents of `params` indicate how to do this.
196
+ *
197
+ * @par Parameter Details
198
+ *
199
+ * `Y` points to the output vector @tensor{y} with shape @tensor_shape{N}.
200
+ *
201
+ * `X0` and `X1` respectively point to the first and second input vectors
202
+ * @tensor{x_0} and @tensor{x_1}, each with shape
203
+ * @tensor_shape{N}.
204
+ *
205
+ * `params` describes the parameters @math{s_i}, @math{m_i}, @math{b} and
206
+ * @math{s_{out}} which are applied for each output element.
207
+ *
208
+ * `elm_start` and `elm_count` together specify which output elements
209
+ * @math{y[k]} should be calculated by this invocation. Specifically, this
210
+ * invocation will calculate @math{y[k]} for which `elm_start` @math{\le k \lt}
211
+ * `(elm_start + elm_count)`.
212
+ *
213
+ * @param[out] Y The output vector @tensor{y}
214
+ * @param[in] X0 The first input vector @tensor{x_0}
215
+ * @param[in] X1 The second input vector @tensor{x_1}
216
+ * @param[in] params The scaling and bias parameters
217
+ * @param[in] elm_start Index of first output element to be computed
218
+ * @param[in] elm_count Number of output elements to be computed
219
+ */
220
+ void add_elementwise(int8_t Y[], const int8_t X1[], const int8_t X2[],
221
+ nn_add_params_t *p, const int elm_start,
222
+ const int elm_count);
223
+
224
+ /**
225
+ * @brief Execute @oper{lookup8} job.
226
+ *
227
+ * See @oper_ref{lookup8} for more details about the @oper{lookup8} operator.
228
+ *
229
+ * Unlike other operators, instances of @oper{lookup8} do not require plans or
230
+ * jobs and no initialization is necessary.
231
+ *
232
+ * `Y` points to the output vector @tensor{y} with length @math{N}.
233
+ *
234
+ * `X` points to the input vector @tensor{x} with length @math{N}.
235
+ *
236
+ * `lut` points to the look-up table @math{T} with shape @tensor_shape{256} and
237
+ * dtype `int8`.
238
+ *
239
+ * `N` is the length @math{N} of the input vector @tensor{x}.
240
+ *
241
+ * @requires_word_alignment{Y,X}
242
+ *
243
+ * @param Y [out] The output vector @tensor{y}
244
+ * @param X [in] The input vector @tensor{x}
245
+ * @param lut [in] Look-up table @tensor{T}
246
+ * @param N [in] Length @math{N} of input and output vectors
247
+ */
248
+ void lookup8(uint8_t *Y, const uint8_t *X, const uint8_t *lut,
249
+ const unsigned elm_start, const unsigned elm_count);
250
+
251
+ /**
252
+ * @brief Execute @oper{softmax_exp_sum} job.
253
+ *
254
+ * `Y` points to the output scalar.
255
+ *
256
+ * `X` points to the input vector @tensor{x} with length @math{N}.
257
+ *
258
+ * `lut` points to the look-up table @math{T} with shape @tensor_shape{256} and
259
+ * dtype `float32`.
260
+ *
261
+ * `N` is the length @math{N} of the input vector @tensor{x}.
262
+ *
263
+ * `elm_start` and `elm_count` together specify which output elements should be
264
+ * summed into the output scalar.
265
+ */
266
+ void softmax_exp_sum(float *Y, const int8_t *X, const float *lut,
267
+ const unsigned elm_start, const unsigned elm_count);
268
+
269
+ /**
270
+ * @brief Execute @oper{softmax_exp_div} job.
271
+ *
272
+ * `Y` points to the output vector @tensor{y} with length @math{N}.
273
+ *
274
+ * `X` points to the input vector @tensor{x} with length @math{N}.
275
+ *
276
+ * `lut` points to the look-up table @math{T} with shape @tensor_shape{256} and
277
+ * dtype `float32`.
278
+ *
279
+ * `inv_sum` is the reciprocal of the sum of the exponentials of the inputs.
280
+ *
281
+ * `elm_start` and `elm_count` together specify which output elements should be
282
+ * calculated by this invocation.
283
+ */
284
+ void softmax_exp_div(int8_t *Y, const int8_t *X, const float *lut,
285
+ const float inv_sum, const unsigned elm_start,
286
+ const unsigned elm_count);
287
+
288
+ void softmax_calculate_inv_sum(float *inv_sum, const float sums[]);
289
+
290
+ void softmax_generate_exp_lut(int zero_point, float scale, float *lut);
291
+
292
+ void softmax_ref(int8_t *Y, const int8_t *X, const float zero_point,
293
+ const float scale, const int length);
294
+
295
+ void softmax_single(int8_t *Y, const int8_t *X, const float *lut,
296
+ const int offset);
297
+
298
+ void mean_int8(const int8_t *input, int8_t *output, const int start_dim_size,
299
+ const int mean_dim_size, const int end_dim_size,
300
+ const float in_zero_point, const float out_zero_point,
301
+ const float scale_mul);
302
+
303
+ #endif // LAYERS_H_