xmos-ai-tools 1.3.2.dev80__py3-none-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (395) hide show
  1. xmos_ai_tools/__init__.py +7 -0
  2. xmos_ai_tools/io_server/__init__.py +151 -0
  3. xmos_ai_tools/runtime/__init__.py +0 -0
  4. xmos_ai_tools/runtime/buildfiles/aitoolslib.cmake +13 -0
  5. xmos_ai_tools/runtime/buildfiles/aitoolslib.make +8 -0
  6. xmos_ai_tools/runtime/include/flash_server.h +74 -0
  7. xmos_ai_tools/runtime/include/flatbuffers/allocator.h +68 -0
  8. xmos_ai_tools/runtime/include/flatbuffers/array.h +243 -0
  9. xmos_ai_tools/runtime/include/flatbuffers/base.h +474 -0
  10. xmos_ai_tools/runtime/include/flatbuffers/bfbs_generator.h +43 -0
  11. xmos_ai_tools/runtime/include/flatbuffers/buffer.h +142 -0
  12. xmos_ai_tools/runtime/include/flatbuffers/buffer_ref.h +53 -0
  13. xmos_ai_tools/runtime/include/flatbuffers/code_generators.h +235 -0
  14. xmos_ai_tools/runtime/include/flatbuffers/default_allocator.h +64 -0
  15. xmos_ai_tools/runtime/include/flatbuffers/detached_buffer.h +114 -0
  16. xmos_ai_tools/runtime/include/flatbuffers/flatbuffer_builder.h +1197 -0
  17. xmos_ai_tools/runtime/include/flatbuffers/flatbuffers.h +270 -0
  18. xmos_ai_tools/runtime/include/flatbuffers/flatc.h +111 -0
  19. xmos_ai_tools/runtime/include/flatbuffers/flexbuffers.h +1897 -0
  20. xmos_ai_tools/runtime/include/flatbuffers/grpc.h +300 -0
  21. xmos_ai_tools/runtime/include/flatbuffers/hash.h +127 -0
  22. xmos_ai_tools/runtime/include/flatbuffers/idl.h +1232 -0
  23. xmos_ai_tools/runtime/include/flatbuffers/minireflect.h +419 -0
  24. xmos_ai_tools/runtime/include/flatbuffers/pch/flatc_pch.h +39 -0
  25. xmos_ai_tools/runtime/include/flatbuffers/pch/pch.h +38 -0
  26. xmos_ai_tools/runtime/include/flatbuffers/reflection.h +502 -0
  27. xmos_ai_tools/runtime/include/flatbuffers/reflection_generated.h +1449 -0
  28. xmos_ai_tools/runtime/include/flatbuffers/registry.h +128 -0
  29. xmos_ai_tools/runtime/include/flatbuffers/stl_emulation.h +509 -0
  30. xmos_ai_tools/runtime/include/flatbuffers/string.h +64 -0
  31. xmos_ai_tools/runtime/include/flatbuffers/struct.h +53 -0
  32. xmos_ai_tools/runtime/include/flatbuffers/table.h +168 -0
  33. xmos_ai_tools/runtime/include/flatbuffers/util.h +690 -0
  34. xmos_ai_tools/runtime/include/flatbuffers/vector.h +370 -0
  35. xmos_ai_tools/runtime/include/flatbuffers/vector_downward.h +271 -0
  36. xmos_ai_tools/runtime/include/flatbuffers/verifier.h +283 -0
  37. xmos_ai_tools/runtime/include/ioserver.h +44 -0
  38. xmos_ai_tools/runtime/include/lib_nn/api/TransposeConv.h +24 -0
  39. xmos_ai_tools/runtime/include/lib_nn/api/add_int16.h +27 -0
  40. xmos_ai_tools/runtime/include/lib_nn/api/add_int16_transform.h +42 -0
  41. xmos_ai_tools/runtime/include/lib_nn/api/dequantize_int16.h +22 -0
  42. xmos_ai_tools/runtime/include/lib_nn/api/dequantize_int16_transform.h +34 -0
  43. xmos_ai_tools/runtime/include/lib_nn/api/expand_8_to_16.h +8 -0
  44. xmos_ai_tools/runtime/include/lib_nn/api/multiply_int16.h +42 -0
  45. xmos_ai_tools/runtime/include/lib_nn/api/multiply_int16_transform.h +71 -0
  46. xmos_ai_tools/runtime/include/lib_nn/api/nn_api.h +15 -0
  47. xmos_ai_tools/runtime/include/lib_nn/api/nn_bin_types.h +14 -0
  48. xmos_ai_tools/runtime/include/lib_nn/api/nn_config.h +287 -0
  49. xmos_ai_tools/runtime/include/lib_nn/api/nn_conv2d_structs.h +72 -0
  50. xmos_ai_tools/runtime/include/lib_nn/api/nn_image.h +26 -0
  51. xmos_ai_tools/runtime/include/lib_nn/api/nn_layers.h +303 -0
  52. xmos_ai_tools/runtime/include/lib_nn/api/nn_op_helper.h +132 -0
  53. xmos_ai_tools/runtime/include/lib_nn/api/nn_op_utils.h +150 -0
  54. xmos_ai_tools/runtime/include/lib_nn/api/nn_operator.h +18 -0
  55. xmos_ai_tools/runtime/include/lib_nn/api/nn_pooling.h +551 -0
  56. xmos_ai_tools/runtime/include/lib_nn/api/nn_types.h +83 -0
  57. xmos_ai_tools/runtime/include/lib_nn/api/nn_window_params.h +55 -0
  58. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16.h +54 -0
  59. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16_kernel_transform.h +37 -0
  60. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16_mappings.h +13 -0
  61. xmos_ai_tools/runtime/include/lib_nn/api/quadratic_approximation.h +82 -0
  62. xmos_ai_tools/runtime/include/lib_nn/api/quadratic_interpolation.h +23 -0
  63. xmos_ai_tools/runtime/include/lib_nn/api/quantize_int16.h +22 -0
  64. xmos_ai_tools/runtime/include/lib_nn/api/quantize_int16_transform.h +33 -0
  65. xmos_ai_tools/runtime/include/lib_nn/api/version.h +13 -0
  66. xmos_ai_tools/runtime/include/lib_nn/api/vpu_memmove_word_aligned.h +15 -0
  67. xmos_ai_tools/runtime/include/lib_nn/api/vpu_memset_256.h +55 -0
  68. xmos_ai_tools/runtime/include/lib_nn/api/vpu_sim.h +118 -0
  69. xmos_ai_tools/runtime/include/lib_nn/api/xs3_vpu.h +216 -0
  70. xmos_ai_tools/runtime/include/lib_nn/api/xs3a_registers.h +2869 -0
  71. xmos_ai_tools/runtime/include/lib_nn/src/asm/asm_constants.h +41 -0
  72. xmos_ai_tools/runtime/include/lib_nn/src/asm/window_op_plan.h +25 -0
  73. xmos_ai_tools/runtime/include/lib_tflite_micro/api/fast_flash.h +47 -0
  74. xmos_ai_tools/runtime/include/lib_tflite_micro/api/inference_engine.h +218 -0
  75. xmos_ai_tools/runtime/include/lib_tflite_micro/api/memory_parallel_transport.h +52 -0
  76. xmos_ai_tools/runtime/include/lib_tflite_micro/api/version.h +13 -0
  77. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_config.h +17 -0
  78. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_device_memory.h +62 -0
  79. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_shared_config.h +31 -0
  80. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/conv2d_float.h +155 -0
  81. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_common.h +19 -0
  82. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_custom_options.h +28 -0
  83. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_error_reporter.h +32 -0
  84. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_interpreter.h +49 -0
  85. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_ops.h +71 -0
  86. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_profiler.h +49 -0
  87. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_utils.h +160 -0
  88. xmos_ai_tools/runtime/include/lib_tflite_micro/src/thread_call.h +119 -0
  89. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_defs.h +4 -0
  90. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_device.h +4 -0
  91. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_std_descriptors.h +4 -0
  92. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_std_requests.h +4 -0
  93. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud.h +518 -0
  94. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_conf_default.h +11 -0
  95. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_device.h +87 -0
  96. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_std_descriptors.h +191 -0
  97. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_std_requests.h +120 -0
  98. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/XUD_USB_Defines.h +70 -0
  99. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/hid.h +23 -0
  100. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudio10.h +30 -0
  101. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudio20.h +357 -0
  102. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudiocommon.h +168 -0
  103. xmos_ai_tools/runtime/include/signal/micro/kernels/delay_flexbuffers_generated_data.h +25 -0
  104. xmos_ai_tools/runtime/include/signal/micro/kernels/energy_flexbuffers_generated_data.h +28 -0
  105. xmos_ai_tools/runtime/include/signal/micro/kernels/fft_flexbuffers_generated_data.h +37 -0
  106. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_flexbuffers_generated_data.h +25 -0
  107. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_log_flexbuffers_generated_data.h +27 -0
  108. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_spectral_subtraction_flexbuffers_generated_data.h +26 -0
  109. xmos_ai_tools/runtime/include/signal/micro/kernels/framer_flexbuffers_generated_data.h +25 -0
  110. xmos_ai_tools/runtime/include/signal/micro/kernels/irfft.h +31 -0
  111. xmos_ai_tools/runtime/include/signal/micro/kernels/overlap_add_flexbuffers_generated_data.h +25 -0
  112. xmos_ai_tools/runtime/include/signal/micro/kernels/pcan_flexbuffers_generated_data.h +7 -0
  113. xmos_ai_tools/runtime/include/signal/micro/kernels/rfft.h +31 -0
  114. xmos_ai_tools/runtime/include/signal/micro/kernels/stacker_flexbuffers_generated_data.h +25 -0
  115. xmos_ai_tools/runtime/include/signal/micro/kernels/window_flexbuffers_generated_data.h +25 -0
  116. xmos_ai_tools/runtime/include/signal/src/circular_buffer.h +118 -0
  117. xmos_ai_tools/runtime/include/signal/src/complex.h +29 -0
  118. xmos_ai_tools/runtime/include/signal/src/energy.h +38 -0
  119. xmos_ai_tools/runtime/include/signal/src/fft_auto_scale.h +35 -0
  120. xmos_ai_tools/runtime/include/signal/src/filter_bank.h +69 -0
  121. xmos_ai_tools/runtime/include/signal/src/filter_bank_log.h +38 -0
  122. xmos_ai_tools/runtime/include/signal/src/filter_bank_spectral_subtraction.h +73 -0
  123. xmos_ai_tools/runtime/include/signal/src/filter_bank_square_root.h +34 -0
  124. xmos_ai_tools/runtime/include/signal/src/irfft.h +84 -0
  125. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_common.h +49 -0
  126. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_float.h +31 -0
  127. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int16.h +30 -0
  128. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int32.h +31 -0
  129. xmos_ai_tools/runtime/include/signal/src/log.h +30 -0
  130. xmos_ai_tools/runtime/include/signal/src/max_abs.h +31 -0
  131. xmos_ai_tools/runtime/include/signal/src/msb.h +32 -0
  132. xmos_ai_tools/runtime/include/signal/src/overlap_add.h +46 -0
  133. xmos_ai_tools/runtime/include/signal/src/pcan_argc_fixed.h +41 -0
  134. xmos_ai_tools/runtime/include/signal/src/rfft.h +85 -0
  135. xmos_ai_tools/runtime/include/signal/src/square_root.h +32 -0
  136. xmos_ai_tools/runtime/include/signal/src/window.h +31 -0
  137. xmos_ai_tools/runtime/include/signal/testdata/fft_test_data.h +48 -0
  138. xmos_ai_tools/runtime/include/tensorflow/lite/array.h +156 -0
  139. xmos_ai_tools/runtime/include/tensorflow/lite/builtin_op_data.h +22 -0
  140. xmos_ai_tools/runtime/include/tensorflow/lite/builtin_ops.h +241 -0
  141. xmos_ai_tools/runtime/include/tensorflow/lite/c/builtin_op_data.h +20 -0
  142. xmos_ai_tools/runtime/include/tensorflow/lite/c/c_api_types.h +26 -0
  143. xmos_ai_tools/runtime/include/tensorflow/lite/c/common.h +30 -0
  144. xmos_ai_tools/runtime/include/tensorflow/lite/context_util.h +54 -0
  145. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/error_reporter.h +72 -0
  146. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/flatbuffer_conversions.h +440 -0
  147. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/tensor_utils.h +28 -0
  148. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/builtin_op_data.h +626 -0
  149. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/c_api_types.h +178 -0
  150. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/common.h +1496 -0
  151. xmos_ai_tools/runtime/include/tensorflow/lite/core/macros.h +78 -0
  152. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/bits.h +102 -0
  153. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft.h +50 -0
  154. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft_io.h +34 -0
  155. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft_util.h +34 -0
  156. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank.h +63 -0
  157. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank_io.h +35 -0
  158. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h +50 -0
  159. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend.h +64 -0
  160. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend_io.h +31 -0
  161. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h +52 -0
  162. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h +48 -0
  163. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h +33 -0
  164. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_lut.h +40 -0
  165. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale.h +39 -0
  166. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale_io.h +33 -0
  167. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h +45 -0
  168. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h +46 -0
  169. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_io.h +36 -0
  170. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h +50 -0
  171. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h +47 -0
  172. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h +57 -0
  173. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window.h +49 -0
  174. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window_io.h +34 -0
  175. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window_util.h +45 -0
  176. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/common.h +1358 -0
  177. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/compatibility.h +122 -0
  178. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/cppmath.h +40 -0
  179. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/max.h +35 -0
  180. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/min.h +35 -0
  181. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/optimized/neon_check.h +20 -0
  182. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/portable_tensor.h +141 -0
  183. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/portable_tensor_utils.h +623 -0
  184. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/quantization_util.h +292 -0
  185. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/add.h +561 -0
  186. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/add_n.h +86 -0
  187. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/arg_min_max.h +88 -0
  188. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_matmul.h +275 -0
  189. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h +101 -0
  190. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/binary_function.h +91 -0
  191. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/broadcast_args.h +56 -0
  192. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/broadcast_to.h +97 -0
  193. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/ceil.h +37 -0
  194. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/comparisons.h +271 -0
  195. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/concatenation.h +141 -0
  196. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/conv.h +289 -0
  197. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/cumsum.h +175 -0
  198. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depth_to_space.h +79 -0
  199. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h +100 -0
  200. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h +319 -0
  201. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/dequantize.h +78 -0
  202. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/div.h +247 -0
  203. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/elu.h +37 -0
  204. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/exp.h +38 -0
  205. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/fill.h +38 -0
  206. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor.h +39 -0
  207. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor_div.h +35 -0
  208. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor_mod.h +44 -0
  209. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/fully_connected.h +323 -0
  210. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/hard_swish.h +168 -0
  211. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/add.h +250 -0
  212. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h +241 -0
  213. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h +291 -0
  214. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h +126 -0
  215. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h +67 -0
  216. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h +121 -0
  217. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h +18 -0
  218. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h +194 -0
  219. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h +264 -0
  220. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h +117 -0
  221. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h +224 -0
  222. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/l2normalization.h +90 -0
  223. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/leaky_relu.h +69 -0
  224. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/log_softmax.h +256 -0
  225. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/logistic.h +132 -0
  226. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/lstm_cell.h +422 -0
  227. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/maximum_minimum.h +64 -0
  228. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/mul.h +267 -0
  229. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/neg.h +37 -0
  230. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/pad.h +169 -0
  231. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/pooling.h +303 -0
  232. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h +333 -0
  233. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h +244 -0
  234. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/prelu.h +111 -0
  235. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h +140 -0
  236. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/quantize.h +89 -0
  237. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/reduce.h +491 -0
  238. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/requantize.h +70 -0
  239. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/resize_bilinear.h +233 -0
  240. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h +102 -0
  241. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/round.h +51 -0
  242. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/select.h +151 -0
  243. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/slice.h +80 -0
  244. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/softmax.h +233 -0
  245. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h +109 -0
  246. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/space_to_depth.h +80 -0
  247. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/strided_slice.h +147 -0
  248. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/sub.h +465 -0
  249. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/tanh.h +129 -0
  250. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/transpose.h +203 -0
  251. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/transpose_conv.h +225 -0
  252. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/runtime_shape.h +168 -0
  253. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/strided_slice_logic.h +278 -0
  254. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/tensor_ctypes.h +42 -0
  255. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/types.h +1096 -0
  256. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/kernel_util.h +341 -0
  257. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/op_macros.h +49 -0
  258. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/padding.h +115 -0
  259. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h +100 -0
  260. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h +104 -0
  261. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h +58 -0
  262. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h +63 -0
  263. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h +144 -0
  264. xmos_ai_tools/runtime/include/tensorflow/lite/micro/benchmarks/micro_benchmark.h +95 -0
  265. xmos_ai_tools/runtime/include/tensorflow/lite/micro/compatibility.h +32 -0
  266. xmos_ai_tools/runtime/include/tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h +49 -0
  267. xmos_ai_tools/runtime/include/tensorflow/lite/micro/debug_log.h +38 -0
  268. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/micro_model_settings.h +37 -0
  269. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/expected_output_data.h +47 -0
  270. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/input_data.h +108 -0
  271. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/network_model.h +166 -0
  272. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/detection_responder.h +32 -0
  273. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/image_provider.h +38 -0
  274. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/main_functions.h +37 -0
  275. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/model_settings.h +35 -0
  276. xmos_ai_tools/runtime/include/tensorflow/lite/micro/fake_micro_context.h +70 -0
  277. xmos_ai_tools/runtime/include/tensorflow/lite/micro/flatbuffer_utils.h +65 -0
  278. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/activation_utils.h +57 -0
  279. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/activations.h +64 -0
  280. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/add.h +78 -0
  281. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_function_specializations.h +141 -0
  282. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_interface.h +75 -0
  283. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h +56 -0
  284. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h +310 -0
  285. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h +145 -0
  286. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h +78 -0
  287. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/ceva_common.h +24 -0
  288. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/ceva_tflm_lib.h +613 -0
  289. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/mcps_macros.h +115 -0
  290. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/types.h +1286 -0
  291. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/circular_buffer.h +45 -0
  292. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/circular_buffer_flexbuffers_generated_data.h +22 -0
  293. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv.h +117 -0
  294. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv_test.h +94 -0
  295. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/depthwise_conv.h +80 -0
  296. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/dequantize.h +38 -0
  297. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/detection_postprocess_flexbuffers_generated_data.h +25 -0
  298. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ethosu.h +28 -0
  299. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/fully_connected.h +112 -0
  300. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/hard_swish.h +30 -0
  301. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_runner.h +86 -0
  302. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_util.h +150 -0
  303. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/leaky_relu.h +43 -0
  304. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/logical.h +35 -0
  305. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/logistic.h +42 -0
  306. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_eval.h +541 -0
  307. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_eval_test.h +817 -0
  308. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_shared.h +150 -0
  309. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/micro_ops.h +158 -0
  310. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/micro_tensor_utils.h +56 -0
  311. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/mul.h +74 -0
  312. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/pad.h +27 -0
  313. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/pooling.h +142 -0
  314. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/prelu.h +39 -0
  315. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/quantize.h +37 -0
  316. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reduce.h +65 -0
  317. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reshape.h +26 -0
  318. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/softmax.h +67 -0
  319. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/strided_slice.h +40 -0
  320. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/sub.h +60 -0
  321. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/svdf.h +100 -0
  322. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/testdata/conv_test_data.h +37 -0
  323. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h +579 -0
  324. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h +47 -0
  325. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/hifimini/fixedpoint_utils.h +139 -0
  326. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_eval.h +216 -0
  327. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_shared.h +78 -0
  328. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa.h +38 -0
  329. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_add.h +48 -0
  330. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h +89 -0
  331. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_depthwise_conv.h +74 -0
  332. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_fully_connected.h +78 -0
  333. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_pad.h +49 -0
  334. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_pooling.h +76 -0
  335. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_reduce.h +47 -0
  336. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_reshape.h +44 -0
  337. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_softmax.h +58 -0
  338. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_svdf.h +39 -0
  339. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_helpers.h +64 -0
  340. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h +170 -0
  341. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/linear_memory_planner.h +53 -0
  342. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/memory_plan_struct.h +73 -0
  343. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/micro_memory_planner.h +95 -0
  344. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h +133 -0
  345. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_allocation_info.h +138 -0
  346. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_allocator.h +351 -0
  347. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_arena_constants.h +28 -0
  348. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_common.h +38 -0
  349. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_context.h +176 -0
  350. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_graph.h +79 -0
  351. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter.h +189 -0
  352. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_context.h +125 -0
  353. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_graph.h +110 -0
  354. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_log.h +42 -0
  355. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_mutable_op_resolver.h +708 -0
  356. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_op_resolver.h +62 -0
  357. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_profiler.h +140 -0
  358. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_profiler_interface.h +38 -0
  359. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_resource_variable.h +89 -0
  360. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_time.h +36 -0
  361. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_utils.h +162 -0
  362. xmos_ai_tools/runtime/include/tensorflow/lite/micro/mock_micro_graph.h +60 -0
  363. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/interpreter/src/python_ops_resolver.h +21 -0
  364. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/tflite_size/src/flatbuffer_size.h +30 -0
  365. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/tflite_size/src/flatbuffer_size_wrapper.h +33 -0
  366. xmos_ai_tools/runtime/include/tensorflow/lite/micro/recording_micro_allocator.h +125 -0
  367. xmos_ai_tools/runtime/include/tensorflow/lite/micro/recording_micro_interpreter.h +69 -0
  368. xmos_ai_tools/runtime/include/tensorflow/lite/micro/system_setup.h +27 -0
  369. xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helper_custom_ops.h +49 -0
  370. xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helpers.h +334 -0
  371. xmos_ai_tools/runtime/include/tensorflow/lite/micro/testing/micro_test.h +267 -0
  372. xmos_ai_tools/runtime/include/tensorflow/lite/micro/testing/test_conv_model.h +23 -0
  373. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tflite_bridge/flatbuffer_conversions_bridge.h +45 -0
  374. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tflite_bridge/micro_error_reporter.h +36 -0
  375. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/log_utils.h +273 -0
  376. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/metrics.h +41 -0
  377. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/op_resolver.h +127 -0
  378. xmos_ai_tools/runtime/include/tensorflow/lite/portable_type_to_tflitetype.h +75 -0
  379. xmos_ai_tools/runtime/include/tensorflow/lite/schema/schema_generated.h +24644 -0
  380. xmos_ai_tools/runtime/include/tensorflow/lite/schema/schema_utils.h +33 -0
  381. xmos_ai_tools/runtime/include/tile_ram_server.h +38 -0
  382. xmos_ai_tools/runtime/lib/libhost_xtflitemicro.a +0 -0
  383. xmos_ai_tools/runtime/lib/libxtflitemicro.a +0 -0
  384. xmos_ai_tools/xformer/__init__.py +60 -0
  385. xmos_ai_tools/xformer/flash.py +190 -0
  386. xmos_ai_tools/xinterpreters/__init__.py +1 -0
  387. xmos_ai_tools/xinterpreters/exceptions.py +38 -0
  388. xmos_ai_tools/xinterpreters/host_interpreter.py +652 -0
  389. xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.1.0.1.dylib +0 -0
  390. xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.dylib +0 -0
  391. xmos_ai_tools-1.3.2.dev80.data/data/bin/xcore-opt +0 -0
  392. xmos_ai_tools-1.3.2.dev80.dist-info/METADATA +33 -0
  393. xmos_ai_tools-1.3.2.dev80.dist-info/RECORD +395 -0
  394. xmos_ai_tools-1.3.2.dev80.dist-info/WHEEL +5 -0
  395. xmos_ai_tools-1.3.2.dev80.dist-info/top_level.txt +1 -0
@@ -0,0 +1,561 @@
1
+ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
17
+
18
+ #include <algorithm>
19
+ #include <cstddef>
20
+ #include <cstdint>
21
+ #include <type_traits>
22
+
23
+ #include "fixedpoint/fixedpoint.h"
24
+ #include "tensorflow/lite/kernels/internal/common.h"
25
+ #include "tensorflow/lite/kernels/internal/compatibility.h"
26
+
27
+ namespace tflite_micro {
28
+
29
+ namespace reference_ops {
30
+
31
+ template <typename T>
32
+ inline void Add(const ArithmeticParams& params,
33
+ const RuntimeShape& input1_shape, const T* input1_data,
34
+ const RuntimeShape& input2_shape, const T* input2_data,
35
+ const RuntimeShape& output_shape, T* output_data) {
36
+ T activation_min, activation_max;
37
+ GetActivationParams(params, &activation_min, &activation_max);
38
+
39
+ const int flat_size =
40
+ MatchingElementsSize(input1_shape, input2_shape, output_shape);
41
+ for (int i = 0; i < flat_size; ++i) {
42
+ output_data[i] = ActivationFunctionWithMinMax(
43
+ input1_data[i] + input2_data[i], activation_min, activation_max);
44
+ }
45
+ }
46
+
47
+ // Element-wise add that can often be used for inner loop of broadcast add as
48
+ // well as the non-broadcast add.
49
+
50
+ // This function is used for 8-bit as well as for 16-bit, but the accumulator
51
+ // is 32-bit for both cases. The overflow does not happen due to the
52
+ // choice of the shift (20 or 15, accordingly - see add.cc for more comments).
53
+ template <typename T>
54
+ inline void AddElementwise(int size, const ArithmeticParams& params,
55
+ const T* input1_data, const T* input2_data,
56
+ T* output_data) {
57
+ TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
58
+ TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
59
+ TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
60
+ TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
61
+
62
+ for (int i = 0; i < size; ++i) {
63
+ const int32_t input1_val = params.input1_offset + input1_data[i];
64
+ const int32_t input2_val = params.input2_offset + input2_data[i];
65
+ const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
66
+ const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
67
+ const int32_t scaled_input1_val =
68
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
69
+ shifted_input1_val, params.input1_multiplier, params.input1_shift);
70
+ const int32_t scaled_input2_val =
71
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
72
+ shifted_input2_val, params.input2_multiplier, params.input2_shift);
73
+ const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
74
+ const int32_t raw_output =
75
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
76
+ raw_sum, params.output_multiplier, params.output_shift) +
77
+ params.output_offset;
78
+ const int32_t clamped_output =
79
+ std::min(params.quantized_activation_max,
80
+ std::max(params.quantized_activation_min, raw_output));
81
+ output_data[i] = static_cast<T>(clamped_output);
82
+ }
83
+ }
84
+
85
+ // Scalar-broadcast add that can be used for inner loop of more general
86
+ // broadcast add, so that, for example, scalar-broadcast with batch will still
87
+ // be fast.
88
+ inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
89
+ uint8_t input1_data, const uint8_t* input2_data,
90
+ uint8_t* output_data) {
91
+ TFLITE_DCHECK_GT(params.input1_offset, -256);
92
+ TFLITE_DCHECK_GT(params.input2_offset, -256);
93
+ TFLITE_DCHECK_LT(params.input1_offset, 256);
94
+ TFLITE_DCHECK_LT(params.input2_offset, 256);
95
+
96
+ const int32_t input1_val = params.input1_offset + input1_data;
97
+ const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
98
+ const int32_t scaled_input1_val =
99
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
100
+ shifted_input1_val, params.input1_multiplier, params.input1_shift);
101
+ for (int i = 0; i < size; ++i) {
102
+ const int32_t input2_val = params.input2_offset + input2_data[i];
103
+ const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
104
+ const int32_t scaled_input2_val =
105
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
106
+ shifted_input2_val, params.input2_multiplier, params.input2_shift);
107
+ const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
108
+ const int32_t raw_output =
109
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
110
+ raw_sum, params.output_multiplier, params.output_shift) +
111
+ params.output_offset;
112
+ const int32_t clamped_output =
113
+ std::min(params.quantized_activation_max,
114
+ std::max(params.quantized_activation_min, raw_output));
115
+ output_data[i] = static_cast<uint8_t>(clamped_output);
116
+ }
117
+ }
118
+
119
+ inline void Add(const ArithmeticParams& params,
120
+ const RuntimeShape& input1_shape, const uint8_t* input1_data,
121
+ const RuntimeShape& input2_shape, const uint8_t* input2_data,
122
+ const RuntimeShape& output_shape, uint8_t* output_data) {
123
+ TFLITE_DCHECK_LE(params.quantized_activation_min,
124
+ params.quantized_activation_max);
125
+ const int flat_size =
126
+ MatchingElementsSize(input1_shape, input2_shape, output_shape);
127
+
128
+ TFLITE_DCHECK_GT(params.input1_offset, -256);
129
+ TFLITE_DCHECK_GT(params.input2_offset, -256);
130
+ TFLITE_DCHECK_LT(params.input1_offset, 256);
131
+ TFLITE_DCHECK_LT(params.input2_offset, 256);
132
+ AddElementwise(flat_size, params, input1_data, input2_data, output_data);
133
+ }
134
+
135
+ inline void AddGeneralParamScale(const ArithmeticParams& params,
136
+ const RuntimeShape& input1_shape,
137
+ const int16_t* input1_data,
138
+ const RuntimeShape& input2_shape,
139
+ const int16_t* input2_data,
140
+ const RuntimeShape& output_shape,
141
+ int16_t* output_data) {
142
+ TFLITE_DCHECK_LE(params.quantized_activation_min,
143
+ params.quantized_activation_max);
144
+ const int flat_size =
145
+ MatchingElementsSize(input1_shape, input2_shape, output_shape);
146
+
147
+ int max_value = std::numeric_limits<int16_t>::max();
148
+
149
+ TFLITE_DCHECK_GT(params.input1_offset, -max_value);
150
+ TFLITE_DCHECK_GT(params.input2_offset, -max_value);
151
+ TFLITE_DCHECK_LT(params.input1_offset, max_value);
152
+ TFLITE_DCHECK_LT(params.input2_offset, max_value);
153
+ AddElementwise(flat_size, params, input1_data, input2_data, output_data);
154
+ }
155
+
156
+ inline void Add(const ArithmeticParams& params,
157
+ const RuntimeShape& input1_shape, const int16_t* input1_data,
158
+ const RuntimeShape& input2_shape, const int16_t* input2_data,
159
+ const RuntimeShape& output_shape, int16_t* output_data,
160
+ bool pot_scale = true) {
161
+ if (!pot_scale) {
162
+ AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
163
+ input2_data, output_shape, output_data);
164
+ return;
165
+ }
166
+
167
+ TFLITE_DCHECK_LE(params.quantized_activation_min,
168
+ params.quantized_activation_max);
169
+
170
+ const int input1_shift = params.input1_shift;
171
+ const int flat_size =
172
+ MatchingElementsSize(input1_shape, input2_shape, output_shape);
173
+ const int16_t output_activation_min = params.quantized_activation_min;
174
+ const int16_t output_activation_max = params.quantized_activation_max;
175
+
176
+ TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
177
+ TFLITE_DCHECK_LE(input1_shift, 0);
178
+ TFLITE_DCHECK_LE(params.input2_shift, 0);
179
+ const int16_t* not_shift_input =
180
+ input1_shift == 0 ? input1_data : input2_data;
181
+ const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
182
+ const int input_right_shift =
183
+ input1_shift == 0 ? -params.input2_shift : -input1_shift;
184
+
185
+ for (int i = 0; i < flat_size; i++) {
186
+ // F0 uses 0 integer bits, range [-1, 1].
187
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
188
+
189
+ F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
190
+ F0 scaled_input = F0::FromRaw(
191
+ gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
192
+ F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
193
+ const int16_t raw_output = result.raw();
194
+ const int16_t clamped_output = std::min(
195
+ output_activation_max, std::max(output_activation_min, raw_output));
196
+ output_data[i] = clamped_output;
197
+ }
198
+ }
199
+
200
+ template <typename T>
201
+ inline void AddBroadcast(const T* input_data, const T* broadcast_data,
202
+ T* output_data, size_t size, T activation_min,
203
+ T activation_max) {
204
+ for (size_t c = 0; c < size; ++c) {
205
+ output_data[c] = ActivationFunctionWithMinMax<T>(
206
+ input_data[c] + broadcast_data[0], activation_min, activation_max);
207
+ }
208
+ }
209
+
210
+ template <>
211
+ inline void AddBroadcast<int32_t>(const int32_t* input_data,
212
+ const int32_t* broadcast_data,
213
+ int32_t* output_data, size_t size,
214
+ int32_t activation_min,
215
+ int32_t activation_max) {
216
+ size_t c = 0;
217
+ #ifdef USE_NEON
218
+ const int32x4_t vmax = vdupq_n_s32(activation_max);
219
+ const int32x4_t vmin = vdupq_n_s32(activation_min);
220
+ const int32x4_t vb = vdupq_n_s32(broadcast_data[0]);
221
+ for (; c + 4 <= size; c += 4) {
222
+ const int32x4_t va = vld1q_s32(&input_data[c]);
223
+ int32x4_t vres = vaddq_s32(va, vb);
224
+ vres = vmaxq_s32(vmin, vres);
225
+ vres = vminq_s32(vmax, vres);
226
+ vst1q_s32(&output_data[c], vres);
227
+ }
228
+ #endif
229
+ for (; c < size; ++c) {
230
+ output_data[c] = ActivationFunctionWithMinMax<int32_t>(
231
+ input_data[c] + broadcast_data[0], activation_min, activation_max);
232
+ }
233
+ }
234
+
235
+ template <typename T>
236
+ void AddElementwise(const T* input1_data, const T* input2_data, T* output_data,
237
+ size_t size, T activation_min, T activation_max) {
238
+ for (size_t c = 0; c < size; ++c) {
239
+ output_data[c] = ActivationFunctionWithMinMax<T>(
240
+ input1_data[c] + input2_data[c], activation_min, activation_max);
241
+ }
242
+ }
243
+
244
+ template <>
245
+ inline void AddElementwise<int32_t>(const int32_t* input1_data,
246
+ const int32_t* input2_data,
247
+ int32_t* output_data, size_t size,
248
+ int32_t activation_min,
249
+ int32_t activation_max) {
250
+ size_t c = 0;
251
+ #ifdef USE_NEON
252
+ const int32x4_t vmax = vdupq_n_s32(activation_max);
253
+ const int32x4_t vmin = vdupq_n_s32(activation_min);
254
+ for (; c + 4 <= size; c += 4) {
255
+ const int32x4_t va = vld1q_s32(&input1_data[c]);
256
+ const int32x4_t vb = vld1q_s32(&input2_data[c]);
257
+ int32x4_t vres = vaddq_s32(va, vb);
258
+ vres = vmaxq_s32(vmin, vres);
259
+ vres = vminq_s32(vmax, vres);
260
+ vst1q_s32(&output_data[c], vres);
261
+ }
262
+ #endif
263
+ for (; c < size; ++c) {
264
+ output_data[c] = ActivationFunctionWithMinMax<int32_t>(
265
+ input1_data[c] + input2_data[c], activation_min, activation_max);
266
+ }
267
+ }
268
+
269
+ template <typename T>
270
+ inline void BroadcastAddRecursiveDimensions(
271
+ int dimension, size_t* input1_offset_p, size_t* input2_offset_p,
272
+ size_t* output_offset, size_t* compressed_input1_stride,
273
+ size_t* compressed_input2_stride, size_t* compressed_output_shape,
274
+ T activation_min, T activation_max, const T* input1_data,
275
+ const T* input2_data, T* output_data) {
276
+ if (dimension > 0) {
277
+ for (size_t c = 0; c < compressed_output_shape[dimension]; ++c) {
278
+ size_t input1_offset_c = *input1_offset_p;
279
+ size_t input2_offset_c = *input2_offset_p;
280
+ BroadcastAddRecursiveDimensions(
281
+ dimension - 1, &input1_offset_c, &input2_offset_c, output_offset,
282
+ compressed_input1_stride, compressed_input2_stride,
283
+ compressed_output_shape, activation_min, activation_max, input1_data,
284
+ input2_data, output_data);
285
+ *input1_offset_p += compressed_input1_stride[dimension];
286
+ *input2_offset_p += compressed_input2_stride[dimension];
287
+ }
288
+ } else {
289
+ TFLITE_DCHECK(dimension == 0);
290
+ bool input1_is_broadcast = compressed_input1_stride[dimension] == 0;
291
+ bool input2_is_broadcast = compressed_input2_stride[dimension] == 0;
292
+ TFLITE_DCHECK(!(input1_is_broadcast && input2_is_broadcast));
293
+ const T* input1_data_ptr = input1_data + *input1_offset_p;
294
+ const T* input2_data_ptr = input2_data + *input2_offset_p;
295
+ T* output_data_ptr = output_data + *output_offset;
296
+ if (input1_is_broadcast) {
297
+ // input1 is broadcast.
298
+ AddBroadcast<T>(input2_data_ptr, input1_data_ptr, output_data_ptr,
299
+ compressed_output_shape[dimension], activation_min,
300
+ activation_max);
301
+ *input2_offset_p += compressed_output_shape[dimension];
302
+ } else if (input2_is_broadcast) {
303
+ // input2 is broadcast.
304
+ AddBroadcast<T>(input1_data_ptr, input2_data_ptr, output_data_ptr,
305
+ compressed_output_shape[dimension], activation_min,
306
+ activation_max);
307
+ *input1_offset_p += compressed_output_shape[dimension];
308
+ } else {
309
+ // Add element-wise.
310
+ AddElementwise<T>(input1_data_ptr, input2_data_ptr, output_data_ptr,
311
+ compressed_output_shape[dimension], activation_min,
312
+ activation_max);
313
+ *input1_offset_p += compressed_output_shape[dimension];
314
+ *input2_offset_p += compressed_output_shape[dimension];
315
+ }
316
+ *output_offset += compressed_output_shape[dimension];
317
+ }
318
+ }
319
+
320
+ template <typename T,
321
+ // For unquantized add for small integers, explicitly set to true.
322
+ bool dummy = false>
323
+ inline typename std::enable_if<!is_small_integer<T>::value || dummy, void>::type
324
+ BroadcastAdd6DSlow(const ArithmeticParams& params,
325
+ const RuntimeShape& input1_shape, const T* input1_data,
326
+ const RuntimeShape& input2_shape, const T* input2_data,
327
+ const RuntimeShape& output_shape, T* output_data) {
328
+ constexpr int kMaxBroadcastDim = 6;
329
+ T activation_min, activation_max;
330
+ GetActivationParams(params, &activation_min, &activation_max);
331
+
332
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
333
+ // col, channel), with extents (batches, height, width, depth), with the
334
+ // trailing dimension changing most rapidly (channels has the smallest stride,
335
+ // typically 1 element).
336
+ //
337
+ // In generated C code, we store arrays with the dimensions reversed. The
338
+ // first dimension has smallest stride.
339
+ //
340
+ // We name our variables by their Tensorflow convention, but generate C code
341
+ // nesting loops such that the innermost loop has the smallest stride for the
342
+ // best cache behavior.
343
+ size_t compressed_input1_stride[kMaxBroadcastDim];
344
+ size_t compressed_input2_stride[kMaxBroadcastDim];
345
+ size_t compressed_output_shape[kMaxBroadcastDim];
346
+ bool broadcastable_shape = ReduceDimensionsForBroadcast<kMaxBroadcastDim>(
347
+ input1_shape, input2_shape, compressed_input1_stride,
348
+ compressed_input2_stride, compressed_output_shape);
349
+ // Skip broadcasting for degenerate shapes.
350
+ if (!broadcastable_shape) {
351
+ return;
352
+ }
353
+
354
+ size_t input1_offset = 0;
355
+ size_t input2_offset = 0;
356
+ size_t output_offset = 0;
357
+ BroadcastAddRecursiveDimensions<T>(
358
+ kMaxBroadcastDim - 1, &input1_offset, &input2_offset, &output_offset,
359
+ compressed_input1_stride, compressed_input2_stride,
360
+ compressed_output_shape, activation_min, activation_max, input1_data,
361
+ input2_data, output_data);
362
+ }
363
+
364
+ // This function is used for 8-bit as well as for 16-bit, but the accumulator
365
+ // is 32-bit for both cases. The overflow does not happen due to the
366
+ // choice of the shift (20 or 15, accordingly - see add.cc for more comments).
367
+ template <typename T>
368
+ inline void BroadcastAddRecursiveDimensions(
369
+ const ArithmeticParams& params, int dimension, size_t* input1_offset_p,
370
+ size_t* input2_offset_p, size_t* output_offset,
371
+ size_t* compressed_input1_stride, size_t* compressed_input2_stride,
372
+ size_t* compressed_output_shape, const T* input1_data, const T* input2_data,
373
+ T* output_data) {
374
+ for (size_t c = 0; c < compressed_output_shape[dimension]; ++c) {
375
+ if (dimension > 0) {
376
+ size_t input1_offset_c = *input1_offset_p;
377
+ size_t input2_offset_c = *input2_offset_p;
378
+ BroadcastAddRecursiveDimensions(
379
+ params, dimension - 1, &input1_offset_c, &input2_offset_c,
380
+ output_offset, compressed_input1_stride, compressed_input2_stride,
381
+ compressed_output_shape, input1_data, input2_data, output_data);
382
+ } else {
383
+ TFLITE_DCHECK(dimension == 0);
384
+ const int32_t input1_val =
385
+ params.input1_offset + input1_data[*input1_offset_p];
386
+ const int32_t input2_val =
387
+ params.input2_offset + input2_data[*input2_offset_p];
388
+ const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
389
+ const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
390
+ const int32_t scaled_input1_val =
391
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
392
+ shifted_input1_val, params.input1_multiplier,
393
+ params.input1_shift);
394
+ const int32_t scaled_input2_val =
395
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
396
+ shifted_input2_val, params.input2_multiplier,
397
+ params.input2_shift);
398
+ const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
399
+ const int32_t raw_output =
400
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
401
+ raw_sum, params.output_multiplier, params.output_shift) +
402
+ params.output_offset;
403
+ const int32_t clamped_output =
404
+ std::min(params.quantized_activation_max,
405
+ std::max(params.quantized_activation_min, raw_output));
406
+ output_data[*output_offset] = static_cast<T>(clamped_output);
407
+ ++(*output_offset);
408
+ }
409
+ *input1_offset_p += compressed_input1_stride[dimension];
410
+ *input2_offset_p += compressed_input2_stride[dimension];
411
+ }
412
+ }
413
+
414
+ // This function is used for 8-bit as well as for 16-bit, but the accumulator
415
+ // is 32-bit for both cases. The overflow does not happen due to the
416
+ // choice of the shift (20 or 15, accordingly - see add.cc for more comments).
417
+ template <typename T>
418
+ inline typename std::enable_if<is_small_integer<T>::value, void>::type
419
+ BroadcastAdd6DSlow(const ArithmeticParams& params,
420
+ const RuntimeShape& input1_shape, const T* input1_data,
421
+ const RuntimeShape& input2_shape, const T* input2_data,
422
+ const RuntimeShape& output_shape, T* output_data) {
423
+ constexpr int kMaxBroadcastDim = 6;
424
+
425
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
426
+ // col, channel), with extents (batches, height, width, depth), with the
427
+ // trailing dimension changing most rapidly (channels has the smallest stride,
428
+ // typically 1 element).
429
+ //
430
+ // In generated C code, we store arrays with the dimensions reversed. The
431
+ // first dimension has smallest stride.
432
+ //
433
+ // We name our variables by their Tensorflow convention, but generate C code
434
+ // nesting loops such that the innermost loop has the smallest stride for the
435
+ // best cache behavior.
436
+ size_t compressed_input1_stride[kMaxBroadcastDim];
437
+ size_t compressed_input2_stride[kMaxBroadcastDim];
438
+ size_t compressed_output_shape[kMaxBroadcastDim];
439
+ bool broadcastable_shape = ReduceDimensionsForBroadcast<kMaxBroadcastDim>(
440
+ input1_shape, input2_shape, compressed_input1_stride,
441
+ compressed_input2_stride, compressed_output_shape);
442
+ // Skip broadcasting for degenerate shapes.
443
+ if (!broadcastable_shape) {
444
+ return;
445
+ }
446
+
447
+ size_t input1_offset = 0;
448
+ size_t input2_offset = 0;
449
+ size_t output_offset = 0;
450
+ BroadcastAddRecursiveDimensions(
451
+ params, kMaxBroadcastDim - 1, &input1_offset, &input2_offset,
452
+ &output_offset, compressed_input1_stride, compressed_input2_stride,
453
+ compressed_output_shape, input1_data, input2_data, output_data);
454
+ }
455
+
456
+ template <typename T>
457
+ inline void BroadcastAdd4DSlow(
458
+ const ArithmeticParams& params, const RuntimeShape& input1_shape,
459
+ const T* input1_data, const RuntimeShape& input2_shape,
460
+ const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
461
+ return BroadcastAdd6DSlow(params, input1_shape, input1_data, input2_shape,
462
+ input2_data, output_shape, output_data);
463
+ }
464
+
465
+ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
466
+ const RuntimeShape& unswitched_input1_shape,
467
+ const uint8_t* unswitched_input1_data,
468
+ const RuntimeShape& unswitched_input2_shape,
469
+ const uint8_t* unswitched_input2_data,
470
+ const RuntimeShape& output_shape,
471
+ uint8_t* output_data) {
472
+ ArithmeticParams switched_params = unswitched_params;
473
+ switched_params.input1_offset = unswitched_params.input2_offset;
474
+ switched_params.input1_multiplier = unswitched_params.input2_multiplier;
475
+ switched_params.input1_shift = unswitched_params.input2_shift;
476
+ switched_params.input2_offset = unswitched_params.input1_offset;
477
+ switched_params.input2_multiplier = unswitched_params.input1_multiplier;
478
+ switched_params.input2_shift = unswitched_params.input1_shift;
479
+
480
+ const bool use_unswitched =
481
+ unswitched_params.broadcast_category ==
482
+ tflite_micro::BroadcastableOpCategory::kFirstInputBroadcastsFast;
483
+
484
+ const ArithmeticParams& params =
485
+ use_unswitched ? unswitched_params : switched_params;
486
+ const uint8_t* input1_data =
487
+ use_unswitched ? unswitched_input1_data : unswitched_input2_data;
488
+ const uint8_t* input2_data =
489
+ use_unswitched ? unswitched_input2_data : unswitched_input1_data;
490
+
491
+ // Fivefold nested loops. The second input resets its position for each
492
+ // iteration of the second loop. The first input resets its position at the
493
+ // beginning of the fourth loop. The innermost loop is an elementwise add of
494
+ // sections of the arrays.
495
+ uint8_t* output_data_ptr = output_data;
496
+ const uint8_t* input1_data_ptr = input1_data;
497
+ const uint8_t* input2_data_reset = input2_data;
498
+ // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
499
+ // between input shapes. y3 for input 1 is always broadcast, and so the
500
+ // dimension there is 1, whereas optionally y1 might be broadcast for input 2.
501
+ // Put another way,
502
+ // input1.shape.FlatSize = y0 * y1 * y2 * y4,
503
+ // input2.shape.FlatSize = y0 * y2 * y3 * y4.
504
+ int y0 = params.broadcast_shape[0];
505
+ int y1 = params.broadcast_shape[1];
506
+ int y2 = params.broadcast_shape[2];
507
+ int y3 = params.broadcast_shape[3];
508
+ int y4 = params.broadcast_shape[4];
509
+ if (y4 > 1) {
510
+ // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
511
+ // dimension.
512
+ for (int i0 = 0; i0 < y0; ++i0) {
513
+ const uint8_t* input2_data_ptr;
514
+ for (int i1 = 0; i1 < y1; ++i1) {
515
+ input2_data_ptr = input2_data_reset;
516
+ for (int i2 = 0; i2 < y2; ++i2) {
517
+ for (int i3 = 0; i3 < y3; ++i3) {
518
+ AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
519
+ output_data_ptr);
520
+ input2_data_ptr += y4;
521
+ output_data_ptr += y4;
522
+ }
523
+ // We have broadcast y4 of input1 data y3 times, and now move on.
524
+ input1_data_ptr += y4;
525
+ }
526
+ }
527
+ // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
528
+ input2_data_reset = input2_data_ptr;
529
+ }
530
+ } else {
531
+ // Special case of y4 == 1, in which the innermost loop is a single element
532
+ // and can be combined with the next (y3) as an inner broadcast.
533
+ //
534
+ // Note that this handles the case of pure scalar broadcast when
535
+ // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
536
+ // broadcast with batch (as y2 > 1).
537
+ //
538
+ // NOTE The process is the same as the above general case except simplified
539
+ // for y4 == 1 and the loop over y3 is contained within the
540
+ // AddScalarBroadcast function.
541
+ for (int i0 = 0; i0 < y0; ++i0) {
542
+ const uint8_t* input2_data_ptr;
543
+ for (int i1 = 0; i1 < y1; ++i1) {
544
+ input2_data_ptr = input2_data_reset;
545
+ for (int i2 = 0; i2 < y2; ++i2) {
546
+ AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
547
+ output_data_ptr);
548
+ input2_data_ptr += y3;
549
+ output_data_ptr += y3;
550
+ input1_data_ptr += 1;
551
+ }
552
+ }
553
+ input2_data_reset = input2_data_ptr;
554
+ }
555
+ }
556
+ }
557
+
558
+ } // namespace reference_ops
559
+ } // namespace tflite_micro
560
+
561
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
@@ -0,0 +1,86 @@
1
+ /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
17
+
18
+ #include <algorithm>
19
+ #include <limits>
20
+
21
+ #include "tensorflow/lite/kernels/internal/common.h"
22
+
23
+ namespace tflite_micro {
24
+ namespace reference_ops {
25
+
26
+ // T is expected to be either float or int.
27
+ template <typename T>
28
+ inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs,
29
+ const T* const* input_data, T* output_data) {
30
+ // All inputs and output should have the same shape, this is checked during
31
+ // Prepare stage.
32
+ const size_t size = input_shape.FlatSize();
33
+ for (size_t i = 0; i < size; ++i) {
34
+ T x = 0;
35
+ for (size_t j = 0; j < num_inputs; ++j) {
36
+ x += input_data[j][i];
37
+ }
38
+ output_data[i] = x;
39
+ }
40
+ }
41
+
42
+ inline void AddN(const ArithmeticParams& params,
43
+ const RuntimeShape& input_shape, const size_t num_inputs,
44
+ const int8_t* const* input_data, int8_t* output_data) {
45
+ TFLITE_DCHECK_LE(params.quantized_activation_min,
46
+ params.quantized_activation_max);
47
+ // Input offset is negative input zero point. Activation tensors are
48
+ // asymmetric quantized so they span the full int8 range.
49
+ // All inputs should have same zero-point and scale, this is checked during
50
+ // Prepare stage.
51
+ TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
52
+ TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
53
+
54
+ // All inputs and output should have the same shape, this is checked during
55
+ // Prepare stage.
56
+ const size_t size = input_shape.FlatSize();
57
+ for (size_t i = 0; i < size; ++i) {
58
+ // accumulate in scaled_x before clamping to avoid overflow
59
+ const int32_t x = params.input1_offset; // x = 0
60
+ const int32_t shifted_x = x * (1 << params.left_shift);
61
+ int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp(
62
+ shifted_x, params.input1_multiplier, params.input1_shift);
63
+
64
+ for (size_t j = 0; j < num_inputs; ++j) {
65
+ const int32_t y = params.input1_offset + input_data[j][i];
66
+ const int32_t shifted_y = y * (1 << params.left_shift);
67
+ int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
68
+ shifted_y, params.input1_multiplier, params.input1_shift);
69
+ scaled_x += scaled_y;
70
+ }
71
+
72
+ const int32_t raw_output =
73
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
74
+ scaled_x, params.output_multiplier, params.output_shift) +
75
+ params.output_offset;
76
+ const int32_t clamped_output =
77
+ std::min(params.quantized_activation_max,
78
+ std::max(params.quantized_activation_min, raw_output));
79
+ output_data[i] = static_cast<int8_t>(clamped_output);
80
+ }
81
+ }
82
+
83
+ } // namespace reference_ops
84
+ } // namespace tflite_micro
85
+
86
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_