xmos-ai-tools 1.3.2.dev80__py3-none-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (395) hide show
  1. xmos_ai_tools/__init__.py +7 -0
  2. xmos_ai_tools/io_server/__init__.py +151 -0
  3. xmos_ai_tools/runtime/__init__.py +0 -0
  4. xmos_ai_tools/runtime/buildfiles/aitoolslib.cmake +13 -0
  5. xmos_ai_tools/runtime/buildfiles/aitoolslib.make +8 -0
  6. xmos_ai_tools/runtime/include/flash_server.h +74 -0
  7. xmos_ai_tools/runtime/include/flatbuffers/allocator.h +68 -0
  8. xmos_ai_tools/runtime/include/flatbuffers/array.h +243 -0
  9. xmos_ai_tools/runtime/include/flatbuffers/base.h +474 -0
  10. xmos_ai_tools/runtime/include/flatbuffers/bfbs_generator.h +43 -0
  11. xmos_ai_tools/runtime/include/flatbuffers/buffer.h +142 -0
  12. xmos_ai_tools/runtime/include/flatbuffers/buffer_ref.h +53 -0
  13. xmos_ai_tools/runtime/include/flatbuffers/code_generators.h +235 -0
  14. xmos_ai_tools/runtime/include/flatbuffers/default_allocator.h +64 -0
  15. xmos_ai_tools/runtime/include/flatbuffers/detached_buffer.h +114 -0
  16. xmos_ai_tools/runtime/include/flatbuffers/flatbuffer_builder.h +1197 -0
  17. xmos_ai_tools/runtime/include/flatbuffers/flatbuffers.h +270 -0
  18. xmos_ai_tools/runtime/include/flatbuffers/flatc.h +111 -0
  19. xmos_ai_tools/runtime/include/flatbuffers/flexbuffers.h +1897 -0
  20. xmos_ai_tools/runtime/include/flatbuffers/grpc.h +300 -0
  21. xmos_ai_tools/runtime/include/flatbuffers/hash.h +127 -0
  22. xmos_ai_tools/runtime/include/flatbuffers/idl.h +1232 -0
  23. xmos_ai_tools/runtime/include/flatbuffers/minireflect.h +419 -0
  24. xmos_ai_tools/runtime/include/flatbuffers/pch/flatc_pch.h +39 -0
  25. xmos_ai_tools/runtime/include/flatbuffers/pch/pch.h +38 -0
  26. xmos_ai_tools/runtime/include/flatbuffers/reflection.h +502 -0
  27. xmos_ai_tools/runtime/include/flatbuffers/reflection_generated.h +1449 -0
  28. xmos_ai_tools/runtime/include/flatbuffers/registry.h +128 -0
  29. xmos_ai_tools/runtime/include/flatbuffers/stl_emulation.h +509 -0
  30. xmos_ai_tools/runtime/include/flatbuffers/string.h +64 -0
  31. xmos_ai_tools/runtime/include/flatbuffers/struct.h +53 -0
  32. xmos_ai_tools/runtime/include/flatbuffers/table.h +168 -0
  33. xmos_ai_tools/runtime/include/flatbuffers/util.h +690 -0
  34. xmos_ai_tools/runtime/include/flatbuffers/vector.h +370 -0
  35. xmos_ai_tools/runtime/include/flatbuffers/vector_downward.h +271 -0
  36. xmos_ai_tools/runtime/include/flatbuffers/verifier.h +283 -0
  37. xmos_ai_tools/runtime/include/ioserver.h +44 -0
  38. xmos_ai_tools/runtime/include/lib_nn/api/TransposeConv.h +24 -0
  39. xmos_ai_tools/runtime/include/lib_nn/api/add_int16.h +27 -0
  40. xmos_ai_tools/runtime/include/lib_nn/api/add_int16_transform.h +42 -0
  41. xmos_ai_tools/runtime/include/lib_nn/api/dequantize_int16.h +22 -0
  42. xmos_ai_tools/runtime/include/lib_nn/api/dequantize_int16_transform.h +34 -0
  43. xmos_ai_tools/runtime/include/lib_nn/api/expand_8_to_16.h +8 -0
  44. xmos_ai_tools/runtime/include/lib_nn/api/multiply_int16.h +42 -0
  45. xmos_ai_tools/runtime/include/lib_nn/api/multiply_int16_transform.h +71 -0
  46. xmos_ai_tools/runtime/include/lib_nn/api/nn_api.h +15 -0
  47. xmos_ai_tools/runtime/include/lib_nn/api/nn_bin_types.h +14 -0
  48. xmos_ai_tools/runtime/include/lib_nn/api/nn_config.h +287 -0
  49. xmos_ai_tools/runtime/include/lib_nn/api/nn_conv2d_structs.h +72 -0
  50. xmos_ai_tools/runtime/include/lib_nn/api/nn_image.h +26 -0
  51. xmos_ai_tools/runtime/include/lib_nn/api/nn_layers.h +303 -0
  52. xmos_ai_tools/runtime/include/lib_nn/api/nn_op_helper.h +132 -0
  53. xmos_ai_tools/runtime/include/lib_nn/api/nn_op_utils.h +150 -0
  54. xmos_ai_tools/runtime/include/lib_nn/api/nn_operator.h +18 -0
  55. xmos_ai_tools/runtime/include/lib_nn/api/nn_pooling.h +551 -0
  56. xmos_ai_tools/runtime/include/lib_nn/api/nn_types.h +83 -0
  57. xmos_ai_tools/runtime/include/lib_nn/api/nn_window_params.h +55 -0
  58. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16.h +54 -0
  59. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16_kernel_transform.h +37 -0
  60. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16_mappings.h +13 -0
  61. xmos_ai_tools/runtime/include/lib_nn/api/quadratic_approximation.h +82 -0
  62. xmos_ai_tools/runtime/include/lib_nn/api/quadratic_interpolation.h +23 -0
  63. xmos_ai_tools/runtime/include/lib_nn/api/quantize_int16.h +22 -0
  64. xmos_ai_tools/runtime/include/lib_nn/api/quantize_int16_transform.h +33 -0
  65. xmos_ai_tools/runtime/include/lib_nn/api/version.h +13 -0
  66. xmos_ai_tools/runtime/include/lib_nn/api/vpu_memmove_word_aligned.h +15 -0
  67. xmos_ai_tools/runtime/include/lib_nn/api/vpu_memset_256.h +55 -0
  68. xmos_ai_tools/runtime/include/lib_nn/api/vpu_sim.h +118 -0
  69. xmos_ai_tools/runtime/include/lib_nn/api/xs3_vpu.h +216 -0
  70. xmos_ai_tools/runtime/include/lib_nn/api/xs3a_registers.h +2869 -0
  71. xmos_ai_tools/runtime/include/lib_nn/src/asm/asm_constants.h +41 -0
  72. xmos_ai_tools/runtime/include/lib_nn/src/asm/window_op_plan.h +25 -0
  73. xmos_ai_tools/runtime/include/lib_tflite_micro/api/fast_flash.h +47 -0
  74. xmos_ai_tools/runtime/include/lib_tflite_micro/api/inference_engine.h +218 -0
  75. xmos_ai_tools/runtime/include/lib_tflite_micro/api/memory_parallel_transport.h +52 -0
  76. xmos_ai_tools/runtime/include/lib_tflite_micro/api/version.h +13 -0
  77. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_config.h +17 -0
  78. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_device_memory.h +62 -0
  79. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_shared_config.h +31 -0
  80. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/conv2d_float.h +155 -0
  81. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_common.h +19 -0
  82. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_custom_options.h +28 -0
  83. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_error_reporter.h +32 -0
  84. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_interpreter.h +49 -0
  85. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_ops.h +71 -0
  86. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_profiler.h +49 -0
  87. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_utils.h +160 -0
  88. xmos_ai_tools/runtime/include/lib_tflite_micro/src/thread_call.h +119 -0
  89. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_defs.h +4 -0
  90. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_device.h +4 -0
  91. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_std_descriptors.h +4 -0
  92. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_std_requests.h +4 -0
  93. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud.h +518 -0
  94. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_conf_default.h +11 -0
  95. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_device.h +87 -0
  96. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_std_descriptors.h +191 -0
  97. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_std_requests.h +120 -0
  98. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/XUD_USB_Defines.h +70 -0
  99. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/hid.h +23 -0
  100. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudio10.h +30 -0
  101. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudio20.h +357 -0
  102. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudiocommon.h +168 -0
  103. xmos_ai_tools/runtime/include/signal/micro/kernels/delay_flexbuffers_generated_data.h +25 -0
  104. xmos_ai_tools/runtime/include/signal/micro/kernels/energy_flexbuffers_generated_data.h +28 -0
  105. xmos_ai_tools/runtime/include/signal/micro/kernels/fft_flexbuffers_generated_data.h +37 -0
  106. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_flexbuffers_generated_data.h +25 -0
  107. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_log_flexbuffers_generated_data.h +27 -0
  108. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_spectral_subtraction_flexbuffers_generated_data.h +26 -0
  109. xmos_ai_tools/runtime/include/signal/micro/kernels/framer_flexbuffers_generated_data.h +25 -0
  110. xmos_ai_tools/runtime/include/signal/micro/kernels/irfft.h +31 -0
  111. xmos_ai_tools/runtime/include/signal/micro/kernels/overlap_add_flexbuffers_generated_data.h +25 -0
  112. xmos_ai_tools/runtime/include/signal/micro/kernels/pcan_flexbuffers_generated_data.h +7 -0
  113. xmos_ai_tools/runtime/include/signal/micro/kernels/rfft.h +31 -0
  114. xmos_ai_tools/runtime/include/signal/micro/kernels/stacker_flexbuffers_generated_data.h +25 -0
  115. xmos_ai_tools/runtime/include/signal/micro/kernels/window_flexbuffers_generated_data.h +25 -0
  116. xmos_ai_tools/runtime/include/signal/src/circular_buffer.h +118 -0
  117. xmos_ai_tools/runtime/include/signal/src/complex.h +29 -0
  118. xmos_ai_tools/runtime/include/signal/src/energy.h +38 -0
  119. xmos_ai_tools/runtime/include/signal/src/fft_auto_scale.h +35 -0
  120. xmos_ai_tools/runtime/include/signal/src/filter_bank.h +69 -0
  121. xmos_ai_tools/runtime/include/signal/src/filter_bank_log.h +38 -0
  122. xmos_ai_tools/runtime/include/signal/src/filter_bank_spectral_subtraction.h +73 -0
  123. xmos_ai_tools/runtime/include/signal/src/filter_bank_square_root.h +34 -0
  124. xmos_ai_tools/runtime/include/signal/src/irfft.h +84 -0
  125. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_common.h +49 -0
  126. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_float.h +31 -0
  127. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int16.h +30 -0
  128. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int32.h +31 -0
  129. xmos_ai_tools/runtime/include/signal/src/log.h +30 -0
  130. xmos_ai_tools/runtime/include/signal/src/max_abs.h +31 -0
  131. xmos_ai_tools/runtime/include/signal/src/msb.h +32 -0
  132. xmos_ai_tools/runtime/include/signal/src/overlap_add.h +46 -0
  133. xmos_ai_tools/runtime/include/signal/src/pcan_argc_fixed.h +41 -0
  134. xmos_ai_tools/runtime/include/signal/src/rfft.h +85 -0
  135. xmos_ai_tools/runtime/include/signal/src/square_root.h +32 -0
  136. xmos_ai_tools/runtime/include/signal/src/window.h +31 -0
  137. xmos_ai_tools/runtime/include/signal/testdata/fft_test_data.h +48 -0
  138. xmos_ai_tools/runtime/include/tensorflow/lite/array.h +156 -0
  139. xmos_ai_tools/runtime/include/tensorflow/lite/builtin_op_data.h +22 -0
  140. xmos_ai_tools/runtime/include/tensorflow/lite/builtin_ops.h +241 -0
  141. xmos_ai_tools/runtime/include/tensorflow/lite/c/builtin_op_data.h +20 -0
  142. xmos_ai_tools/runtime/include/tensorflow/lite/c/c_api_types.h +26 -0
  143. xmos_ai_tools/runtime/include/tensorflow/lite/c/common.h +30 -0
  144. xmos_ai_tools/runtime/include/tensorflow/lite/context_util.h +54 -0
  145. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/error_reporter.h +72 -0
  146. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/flatbuffer_conversions.h +440 -0
  147. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/tensor_utils.h +28 -0
  148. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/builtin_op_data.h +626 -0
  149. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/c_api_types.h +178 -0
  150. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/common.h +1496 -0
  151. xmos_ai_tools/runtime/include/tensorflow/lite/core/macros.h +78 -0
  152. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/bits.h +102 -0
  153. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft.h +50 -0
  154. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft_io.h +34 -0
  155. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft_util.h +34 -0
  156. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank.h +63 -0
  157. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank_io.h +35 -0
  158. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h +50 -0
  159. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend.h +64 -0
  160. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend_io.h +31 -0
  161. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h +52 -0
  162. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h +48 -0
  163. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h +33 -0
  164. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_lut.h +40 -0
  165. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale.h +39 -0
  166. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale_io.h +33 -0
  167. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h +45 -0
  168. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h +46 -0
  169. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_io.h +36 -0
  170. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h +50 -0
  171. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h +47 -0
  172. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h +57 -0
  173. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window.h +49 -0
  174. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window_io.h +34 -0
  175. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window_util.h +45 -0
  176. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/common.h +1358 -0
  177. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/compatibility.h +122 -0
  178. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/cppmath.h +40 -0
  179. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/max.h +35 -0
  180. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/min.h +35 -0
  181. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/optimized/neon_check.h +20 -0
  182. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/portable_tensor.h +141 -0
  183. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/portable_tensor_utils.h +623 -0
  184. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/quantization_util.h +292 -0
  185. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/add.h +561 -0
  186. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/add_n.h +86 -0
  187. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/arg_min_max.h +88 -0
  188. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_matmul.h +275 -0
  189. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h +101 -0
  190. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/binary_function.h +91 -0
  191. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/broadcast_args.h +56 -0
  192. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/broadcast_to.h +97 -0
  193. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/ceil.h +37 -0
  194. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/comparisons.h +271 -0
  195. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/concatenation.h +141 -0
  196. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/conv.h +289 -0
  197. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/cumsum.h +175 -0
  198. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depth_to_space.h +79 -0
  199. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h +100 -0
  200. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h +319 -0
  201. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/dequantize.h +78 -0
  202. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/div.h +247 -0
  203. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/elu.h +37 -0
  204. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/exp.h +38 -0
  205. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/fill.h +38 -0
  206. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor.h +39 -0
  207. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor_div.h +35 -0
  208. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor_mod.h +44 -0
  209. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/fully_connected.h +323 -0
  210. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/hard_swish.h +168 -0
  211. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/add.h +250 -0
  212. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h +241 -0
  213. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h +291 -0
  214. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h +126 -0
  215. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h +67 -0
  216. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h +121 -0
  217. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h +18 -0
  218. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h +194 -0
  219. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h +264 -0
  220. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h +117 -0
  221. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h +224 -0
  222. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/l2normalization.h +90 -0
  223. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/leaky_relu.h +69 -0
  224. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/log_softmax.h +256 -0
  225. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/logistic.h +132 -0
  226. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/lstm_cell.h +422 -0
  227. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/maximum_minimum.h +64 -0
  228. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/mul.h +267 -0
  229. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/neg.h +37 -0
  230. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/pad.h +169 -0
  231. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/pooling.h +303 -0
  232. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h +333 -0
  233. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h +244 -0
  234. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/prelu.h +111 -0
  235. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h +140 -0
  236. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/quantize.h +89 -0
  237. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/reduce.h +491 -0
  238. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/requantize.h +70 -0
  239. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/resize_bilinear.h +233 -0
  240. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h +102 -0
  241. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/round.h +51 -0
  242. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/select.h +151 -0
  243. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/slice.h +80 -0
  244. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/softmax.h +233 -0
  245. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h +109 -0
  246. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/space_to_depth.h +80 -0
  247. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/strided_slice.h +147 -0
  248. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/sub.h +465 -0
  249. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/tanh.h +129 -0
  250. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/transpose.h +203 -0
  251. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/transpose_conv.h +225 -0
  252. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/runtime_shape.h +168 -0
  253. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/strided_slice_logic.h +278 -0
  254. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/tensor_ctypes.h +42 -0
  255. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/types.h +1096 -0
  256. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/kernel_util.h +341 -0
  257. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/op_macros.h +49 -0
  258. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/padding.h +115 -0
  259. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h +100 -0
  260. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h +104 -0
  261. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h +58 -0
  262. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h +63 -0
  263. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h +144 -0
  264. xmos_ai_tools/runtime/include/tensorflow/lite/micro/benchmarks/micro_benchmark.h +95 -0
  265. xmos_ai_tools/runtime/include/tensorflow/lite/micro/compatibility.h +32 -0
  266. xmos_ai_tools/runtime/include/tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h +49 -0
  267. xmos_ai_tools/runtime/include/tensorflow/lite/micro/debug_log.h +38 -0
  268. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/micro_model_settings.h +37 -0
  269. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/expected_output_data.h +47 -0
  270. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/input_data.h +108 -0
  271. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/network_model.h +166 -0
  272. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/detection_responder.h +32 -0
  273. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/image_provider.h +38 -0
  274. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/main_functions.h +37 -0
  275. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/model_settings.h +35 -0
  276. xmos_ai_tools/runtime/include/tensorflow/lite/micro/fake_micro_context.h +70 -0
  277. xmos_ai_tools/runtime/include/tensorflow/lite/micro/flatbuffer_utils.h +65 -0
  278. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/activation_utils.h +57 -0
  279. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/activations.h +64 -0
  280. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/add.h +78 -0
  281. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_function_specializations.h +141 -0
  282. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_interface.h +75 -0
  283. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h +56 -0
  284. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h +310 -0
  285. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h +145 -0
  286. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h +78 -0
  287. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/ceva_common.h +24 -0
  288. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/ceva_tflm_lib.h +613 -0
  289. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/mcps_macros.h +115 -0
  290. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/types.h +1286 -0
  291. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/circular_buffer.h +45 -0
  292. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/circular_buffer_flexbuffers_generated_data.h +22 -0
  293. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv.h +117 -0
  294. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv_test.h +94 -0
  295. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/depthwise_conv.h +80 -0
  296. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/dequantize.h +38 -0
  297. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/detection_postprocess_flexbuffers_generated_data.h +25 -0
  298. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ethosu.h +28 -0
  299. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/fully_connected.h +112 -0
  300. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/hard_swish.h +30 -0
  301. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_runner.h +86 -0
  302. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_util.h +150 -0
  303. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/leaky_relu.h +43 -0
  304. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/logical.h +35 -0
  305. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/logistic.h +42 -0
  306. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_eval.h +541 -0
  307. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_eval_test.h +817 -0
  308. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_shared.h +150 -0
  309. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/micro_ops.h +158 -0
  310. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/micro_tensor_utils.h +56 -0
  311. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/mul.h +74 -0
  312. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/pad.h +27 -0
  313. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/pooling.h +142 -0
  314. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/prelu.h +39 -0
  315. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/quantize.h +37 -0
  316. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reduce.h +65 -0
  317. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reshape.h +26 -0
  318. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/softmax.h +67 -0
  319. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/strided_slice.h +40 -0
  320. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/sub.h +60 -0
  321. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/svdf.h +100 -0
  322. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/testdata/conv_test_data.h +37 -0
  323. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h +579 -0
  324. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h +47 -0
  325. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/hifimini/fixedpoint_utils.h +139 -0
  326. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_eval.h +216 -0
  327. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_shared.h +78 -0
  328. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa.h +38 -0
  329. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_add.h +48 -0
  330. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h +89 -0
  331. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_depthwise_conv.h +74 -0
  332. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_fully_connected.h +78 -0
  333. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_pad.h +49 -0
  334. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_pooling.h +76 -0
  335. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_reduce.h +47 -0
  336. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_reshape.h +44 -0
  337. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_softmax.h +58 -0
  338. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_svdf.h +39 -0
  339. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_helpers.h +64 -0
  340. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h +170 -0
  341. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/linear_memory_planner.h +53 -0
  342. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/memory_plan_struct.h +73 -0
  343. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/micro_memory_planner.h +95 -0
  344. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h +133 -0
  345. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_allocation_info.h +138 -0
  346. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_allocator.h +351 -0
  347. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_arena_constants.h +28 -0
  348. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_common.h +38 -0
  349. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_context.h +176 -0
  350. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_graph.h +79 -0
  351. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter.h +189 -0
  352. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_context.h +125 -0
  353. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_graph.h +110 -0
  354. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_log.h +42 -0
  355. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_mutable_op_resolver.h +708 -0
  356. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_op_resolver.h +62 -0
  357. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_profiler.h +140 -0
  358. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_profiler_interface.h +38 -0
  359. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_resource_variable.h +89 -0
  360. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_time.h +36 -0
  361. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_utils.h +162 -0
  362. xmos_ai_tools/runtime/include/tensorflow/lite/micro/mock_micro_graph.h +60 -0
  363. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/interpreter/src/python_ops_resolver.h +21 -0
  364. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/tflite_size/src/flatbuffer_size.h +30 -0
  365. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/tflite_size/src/flatbuffer_size_wrapper.h +33 -0
  366. xmos_ai_tools/runtime/include/tensorflow/lite/micro/recording_micro_allocator.h +125 -0
  367. xmos_ai_tools/runtime/include/tensorflow/lite/micro/recording_micro_interpreter.h +69 -0
  368. xmos_ai_tools/runtime/include/tensorflow/lite/micro/system_setup.h +27 -0
  369. xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helper_custom_ops.h +49 -0
  370. xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helpers.h +334 -0
  371. xmos_ai_tools/runtime/include/tensorflow/lite/micro/testing/micro_test.h +267 -0
  372. xmos_ai_tools/runtime/include/tensorflow/lite/micro/testing/test_conv_model.h +23 -0
  373. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tflite_bridge/flatbuffer_conversions_bridge.h +45 -0
  374. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tflite_bridge/micro_error_reporter.h +36 -0
  375. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/log_utils.h +273 -0
  376. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/metrics.h +41 -0
  377. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/op_resolver.h +127 -0
  378. xmos_ai_tools/runtime/include/tensorflow/lite/portable_type_to_tflitetype.h +75 -0
  379. xmos_ai_tools/runtime/include/tensorflow/lite/schema/schema_generated.h +24644 -0
  380. xmos_ai_tools/runtime/include/tensorflow/lite/schema/schema_utils.h +33 -0
  381. xmos_ai_tools/runtime/include/tile_ram_server.h +38 -0
  382. xmos_ai_tools/runtime/lib/libhost_xtflitemicro.a +0 -0
  383. xmos_ai_tools/runtime/lib/libxtflitemicro.a +0 -0
  384. xmos_ai_tools/xformer/__init__.py +60 -0
  385. xmos_ai_tools/xformer/flash.py +190 -0
  386. xmos_ai_tools/xinterpreters/__init__.py +1 -0
  387. xmos_ai_tools/xinterpreters/exceptions.py +38 -0
  388. xmos_ai_tools/xinterpreters/host_interpreter.py +652 -0
  389. xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.1.0.1.dylib +0 -0
  390. xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.dylib +0 -0
  391. xmos_ai_tools-1.3.2.dev80.data/data/bin/xcore-opt +0 -0
  392. xmos_ai_tools-1.3.2.dev80.dist-info/METADATA +33 -0
  393. xmos_ai_tools-1.3.2.dev80.dist-info/RECORD +395 -0
  394. xmos_ai_tools-1.3.2.dev80.dist-info/WHEEL +5 -0
  395. xmos_ai_tools-1.3.2.dev80.dist-info/top_level.txt +1 -0
@@ -0,0 +1,168 @@
1
+ /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_
17
+
18
+ #include <algorithm>
19
+
20
+ #include "ruy/profiler/instrumentation.h" // from @ruy
21
+ #include "tensorflow/lite/kernels/internal/common.h"
22
+ #include "tensorflow/lite/kernels/internal/types.h"
23
+
24
+ namespace tflite_micro {
25
+ namespace reference_ops {
26
+
27
+ inline int16_t SaturatingLeftShift(int16_t value, int amount) {
28
+ int64_t result = static_cast<int64_t>(value) * (1 << amount);
29
+ result = std::min<int64_t>(result, std::numeric_limits<int16_t>::max());
30
+ result = std::max<int64_t>(result, std::numeric_limits<int16_t>::min());
31
+ return result;
32
+ }
33
+
34
+ // Similar to ARM instruction SQDMULH.
35
+ // Similar to gemmlowp::SaturatingRoundingDoublingHighMul except
36
+ // rounding to zero instead of to nearest (SQRDMULH).
37
+ inline std::int16_t SaturatingDoublingHighMul(std::int16_t a, std::int16_t b) {
38
+ bool overflow = a == b && a == std::numeric_limits<std::int16_t>::min();
39
+ std::int32_t a_32(a);
40
+ std::int32_t b_32(b);
41
+ std::int32_t ab_32 = a_32 * b_32;
42
+ std::int16_t ab_x2_high16 = static_cast<std::int16_t>((ab_32) / (1 << 15));
43
+ return overflow ? std::numeric_limits<std::int16_t>::max() : ab_x2_high16;
44
+ }
45
+
46
+ template <typename T>
47
+ inline void HardSwish(const RuntimeShape& input_shape, const T* input_data,
48
+ const RuntimeShape& output_shape, T* output_data) {
49
+ ruy::profiler::ScopeLabel label("ReferenceHardSwish/Float");
50
+ auto matching_size = MatchingFlatSize(input_shape, output_shape);
51
+ const T* in_end = input_data + matching_size;
52
+ for (; input_data < in_end; input_data++, output_data++) {
53
+ const float in = *input_data;
54
+ *output_data =
55
+ in * std::min(static_cast<T>(6), std::max(static_cast<T>(0), in + 3)) /
56
+ 6;
57
+ }
58
+ }
59
+
60
+ template <typename T>
61
+ inline void HardSwish(const HardSwishParams& params,
62
+ const RuntimeShape& input_shape, const T* input_data,
63
+ const RuntimeShape& output_shape, T* output_data) {
64
+ ruy::profiler::ScopeLabel label("ReferenceHardSwish/Quantized");
65
+
66
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
67
+
68
+ for (int i = 0; i < flat_size; i++) {
69
+ const int16_t input_value = input_data[i] - params.input_zero_point;
70
+ // Left-shift as much as we can without overflow/saturation to put
71
+ // significant bits in the high bits of our 16-bit fixedpoint values, so
72
+ // that fixed-point approximate computations below are as accurate as
73
+ // possible.
74
+ const int16_t input_value_on_hires_input_scale = input_value * (1 << 7);
75
+ // Compute the input value on essentially the output scale, just not
76
+ // right-shifted yet. This is the value that we'll use in the (x >= +3)
77
+ // case, and that in the general case we'll multiply against the "relu-ish"
78
+ // fixed-point multiplier in [0, 1].
79
+ const int16_t input_value_on_preshift_output_scale =
80
+ gemmlowp::SaturatingRoundingDoublingHighMul(
81
+ input_value_on_hires_input_scale,
82
+ params.output_multiplier_fixedpoint_int16);
83
+ // Now compute the "relu-ish multiplier". In the (-3 <= x <= +3) case, that
84
+ // is just an affine rescaling of x from [-3, 3] to [0, 1]. In the general
85
+ // case, it is just that plus saturation at the boundaries of [-3, 3].
86
+ // First, we rescale from [-3, 3] to [-1, 1], saturating.
87
+ // That is done by rescaling the input value with a fixed-point multiplier
88
+ // (reluish_multiplier_fixedpoint) and bit-shift such that we represent
89
+ // that input value on the scale where the real value 3.0f is represented
90
+ // by the quantized value 32768. (+32768 is actually not representable as
91
+ // int16_t, so this saturates at +32767, and that is seen empirically to be
92
+ // a negligible contribution to numerical error/bias).
93
+ //
94
+ // This code is careful to correctly implement any magnitude of multiplier,
95
+ // involving either a right shift or a left shift, with correct saturation
96
+ // behavior in the left-shift case. This forces this code to be more
97
+ // complicated, but is necessary for real applications: a partially
98
+ // trained quantized MobileNet v3-small model that motivated this code
99
+ // exhibits some large [min, max] range boundaries, of the order of
100
+ // magnitude of 10 or 100 depending on layers.
101
+ //
102
+ // The next few lines are basically just an ordinary
103
+ // MultiplyByQuantizedMultiplier, except that we are more careful here
104
+ // about the fine details of saturation when left-shifting, because here
105
+ // overflow in left-shift is a common case, not an anomaly as
106
+ // MultiplyByQuantizedMultiplier assumes.
107
+ int16_t reluish_value = input_value_on_hires_input_scale;
108
+ // Shift left, saturating, as much as we can while ensuring that this
109
+ // saturation will not contribute to the result. That is, left shift amount
110
+ // reduced by 1.
111
+ if (params.reluish_multiplier_exponent > 0) {
112
+ reluish_value = SaturatingLeftShift(
113
+ reluish_value, params.reluish_multiplier_exponent - 1);
114
+ }
115
+ // Apply the fixed-point multiplier, dividing the value by a divisor
116
+ // ranging in [1, 2].
117
+ reluish_value = gemmlowp::SaturatingRoundingDoublingHighMul(
118
+ reluish_value, params.reluish_multiplier_fixedpoint_int16);
119
+ // Apply the last bit of left-shift. Thus, in the left-shifting case, if
120
+ // any saturation affects the result, it is happening here --- any
121
+ // saturation having occurred above is overwritten here, not affecting the
122
+ // result.
123
+ if (params.reluish_multiplier_exponent > 0) {
124
+ reluish_value = SaturatingLeftShift(reluish_value, 1);
125
+ }
126
+ // Shift right, in the right-shifting case.
127
+ if (params.reluish_multiplier_exponent < 0) {
128
+ reluish_value = gemmlowp::RoundingDivideByPOT(
129
+ reluish_value, -params.reluish_multiplier_exponent);
130
+ }
131
+ // At this point we have rescaled the value into a 16bit fixedpoint
132
+ // reluish_value in [-1, 1].
133
+ // We now convert that to a 16bit fixedpoint value in [0, 1].
134
+ reluish_value = (reluish_value + (1 << 15)) >> 1;
135
+ // Use of SaturatingDoublingHighMul here is important to cancel the biases
136
+ // from the above SaturatingRoundingDoublingHighMul.
137
+ //
138
+ // On a partially trained MobileNet-v3-small,
139
+ //
140
+ // | bias on | ImageNet
141
+ // | quantized | Top-1
142
+ // Operation used here | values | accuracy (50k)
143
+ // --------------------------------------+------------+-----------
144
+ // SaturatingDoublingHighMul | -0.0024 | 58.920
145
+ // SaturatingRoundingDoublingHighMul | -0.0067 | 58.064
146
+ //
147
+ // In activations_test, this is covered by this testcase:
148
+ // QuantizedActivationsOpTest.HardSwishBias
149
+ //
150
+ const int16_t preshift_output_value = SaturatingDoublingHighMul(
151
+ reluish_value, input_value_on_preshift_output_scale);
152
+ // We were so far operating on the pre-shift output scale. Now we finally
153
+ // apply that output shift, arriving at the final output scale.
154
+ int16_t output_value = gemmlowp::RoundingDivideByPOT(
155
+ preshift_output_value, -params.output_multiplier_exponent);
156
+ output_value += params.output_zero_point;
157
+ output_value =
158
+ std::min<int16_t>(output_value, std::numeric_limits<T>::max());
159
+ output_value =
160
+ std::max<int16_t>(output_value, std::numeric_limits<T>::min());
161
+ output_data[i] = output_value;
162
+ }
163
+ }
164
+
165
+ } // namespace reference_ops
166
+ } // namespace tflite_micro
167
+
168
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_
@@ -0,0 +1,250 @@
1
+ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
17
+
18
+ #include <algorithm>
19
+ #include <cstddef>
20
+ #include <limits>
21
+
22
+ #include "tensorflow/lite/kernels/internal/common.h"
23
+ #include "tensorflow/lite/kernels/internal/types.h"
24
+
25
+ namespace tflite_micro {
26
+ namespace reference_integer_ops {
27
+
28
+ inline void CheckArithmeticParams(const ArithmeticParams& params) {
29
+ TFLITE_DCHECK_LE(params.quantized_activation_min,
30
+ params.quantized_activation_max);
31
+ // Input offset is negative input zero point. Activation tensors are
32
+ // asymmetric quantized so they span the full int8 range.
33
+ TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
34
+ TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
35
+ TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
36
+ TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
37
+ }
38
+
39
+ // TODO: b/270589088 - move to a more appropriate file (b/270589088#comment2)
40
+ template <typename T>
41
+ void BroadcastInput1(int size, const ArithmeticParams& params,
42
+ const T* input1_data, const T* input2_data, T* output_data,
43
+ void (*check_arithmetic_params)(const ArithmeticParams&),
44
+ T (*binary_func)(T, T, const ArithmeticParams&)) {
45
+ CheckArithmeticParams(params);
46
+ for (int i = 0; i < size; ++i) {
47
+ output_data[i] = binary_func(input1_data[0], input2_data[i], params);
48
+ }
49
+ }
50
+
51
+ template <typename T>
52
+ void BroadcastInput2(int size, const ArithmeticParams& params,
53
+ const T* input1_data, const T* input2_data, T* output_data,
54
+ void (*check_arithmetic_params)(const ArithmeticParams&),
55
+ T (*binary_func)(T, T, const ArithmeticParams&)) {
56
+ CheckArithmeticParams(params);
57
+ for (int i = 0; i < size; ++i) {
58
+ output_data[i] = binary_func(input1_data[i], input2_data[0], params);
59
+ }
60
+ }
61
+
62
+ // TODO: b/270589088 - move to a more appropriate file (b/270589088#comment2)
63
+ template <typename T>
64
+ void ElementWise(int size, const ArithmeticParams& params, const T* input1_data,
65
+ const T* input2_data, T* output_data,
66
+ void (*check_arithmetic_params)(const ArithmeticParams&),
67
+ T (*binary_func)(T, T, const ArithmeticParams&)) {
68
+ CheckArithmeticParams(params);
69
+ for (int i = 0; i < size; ++i) {
70
+ output_data[i] = binary_func(input1_data[i], input2_data[i], params);
71
+ }
72
+ }
73
+
74
+ template <typename T>
75
+ inline void BroadcastAddRecursiveDimensions(
76
+ const ArithmeticParams& params, int dimension, size_t* input1_offset_p,
77
+ size_t* input2_offset_p, size_t* output_offset,
78
+ size_t* compressed_input1_stride, size_t* compressed_input2_stride,
79
+ size_t* compressed_output_shape, const T* input1_data, const T* input2_data,
80
+ T* output_data, void (*check_arithmetic_params)(const ArithmeticParams&),
81
+ T (*binary_func)(T, T, const ArithmeticParams&)) {
82
+ if (dimension > 0) {
83
+ for (size_t c = 0; c < compressed_output_shape[dimension]; ++c) {
84
+ size_t input1_offset_c = *input1_offset_p;
85
+ size_t input2_offset_c = *input2_offset_p;
86
+ BroadcastAddRecursiveDimensions(
87
+ params, dimension - 1, &input1_offset_c, &input2_offset_c,
88
+ output_offset, compressed_input1_stride, compressed_input2_stride,
89
+ compressed_output_shape, input1_data, input2_data, output_data,
90
+ check_arithmetic_params, binary_func);
91
+ *input1_offset_p += compressed_input1_stride[dimension];
92
+ *input2_offset_p += compressed_input2_stride[dimension];
93
+ }
94
+ } else {
95
+ TFLITE_DCHECK(dimension == 0);
96
+ bool input1_is_broadcast = compressed_input1_stride[dimension] == 0;
97
+ bool input2_is_broadcast = compressed_input2_stride[dimension] == 0;
98
+ TFLITE_DCHECK(!(input1_is_broadcast && input2_is_broadcast));
99
+ const T* input1_data_ptr = input1_data + *input1_offset_p;
100
+ const T* input2_data_ptr = input2_data + *input2_offset_p;
101
+ T* output_data_ptr = output_data + *output_offset;
102
+ if (input1_is_broadcast) {
103
+ // input1 is broadcast.
104
+ BroadcastInput1<T>(compressed_output_shape[dimension], params,
105
+ input1_data_ptr, input2_data_ptr, output_data_ptr,
106
+ check_arithmetic_params, binary_func);
107
+ *input2_offset_p += compressed_output_shape[dimension];
108
+ } else if (input2_is_broadcast) {
109
+ // input2 is broadcast.
110
+ BroadcastInput2<T>(compressed_output_shape[dimension], params,
111
+ input1_data_ptr, input2_data_ptr, output_data_ptr,
112
+ check_arithmetic_params, binary_func);
113
+ *input1_offset_p += compressed_output_shape[dimension];
114
+ } else {
115
+ // Add element-wise.
116
+ ElementWise<T>(compressed_output_shape[dimension], params,
117
+ input1_data_ptr, input2_data_ptr, output_data_ptr,
118
+ check_arithmetic_params, binary_func);
119
+ *input1_offset_p += compressed_output_shape[dimension];
120
+ *input2_offset_p += compressed_output_shape[dimension];
121
+ }
122
+ *output_offset += compressed_output_shape[dimension];
123
+ }
124
+ }
125
+
126
+ // TODO: b/270589088 - move to a more appropriate file. (b/270589088#comment2)
127
+ template <typename T>
128
+ void BroadcastBinaryFunction6DSlow(
129
+ const ArithmeticParams& params, const RuntimeShape& input1_shape,
130
+ const T* input1_data, const RuntimeShape& input2_shape,
131
+ const T* input2_data, const RuntimeShape& output_shape, T* output_data,
132
+ void (*check_arithmetic_params)(const ArithmeticParams&),
133
+ T (*binary_func)(T, T, const ArithmeticParams&)) {
134
+ constexpr int kMaxBroadcastDim = 6;
135
+
136
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
137
+ // col, channel), with extents (batches, height, width, depth), with the
138
+ // trailing dimension changing most rapidly (channels has the smallest stride,
139
+ // typically 1 element).
140
+ //
141
+ // In generated C code, we store arrays with the dimensions reversed. The
142
+ // first dimension has smallest stride.
143
+ //
144
+ // We name our variables by their Tensorflow convention, but generate C code
145
+ // nesting loops such that the innermost loop has the smallest stride for the
146
+ // best cache behavior.
147
+ size_t compressed_input1_stride[kMaxBroadcastDim];
148
+ size_t compressed_input2_stride[kMaxBroadcastDim];
149
+ size_t compressed_output_shape[kMaxBroadcastDim];
150
+ bool broadcastable_shape = ReduceDimensionsForBroadcast<kMaxBroadcastDim>(
151
+ input1_shape, input2_shape, compressed_input1_stride,
152
+ compressed_input2_stride, compressed_output_shape);
153
+ // Skip broadcasting for degenerate shapes.
154
+ if (!broadcastable_shape) {
155
+ return;
156
+ }
157
+
158
+ size_t input1_offset = 0;
159
+ size_t input2_offset = 0;
160
+ size_t output_offset = 0;
161
+ BroadcastAddRecursiveDimensions(
162
+ params, kMaxBroadcastDim - 1, &input1_offset, &input2_offset,
163
+ &output_offset, compressed_input1_stride, compressed_input2_stride,
164
+ compressed_output_shape, input1_data, input2_data, output_data,
165
+ check_arithmetic_params, binary_func);
166
+ }
167
+
168
+ template <typename T>
169
+ void BroadcastBinaryFunction4DSlow(
170
+ const ArithmeticParams& params, const RuntimeShape& input1_shape,
171
+ const T* input1_data, const RuntimeShape& input2_shape,
172
+ const T* input2_data, const RuntimeShape& output_shape, T* output_data,
173
+ void (*check_arithmetic_params)(const ArithmeticParams&),
174
+ T (*binary_func)(T, T, const ArithmeticParams&)) {
175
+ BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape,
176
+ input2_data, output_shape, output_data,
177
+ check_arithmetic_params, binary_func);
178
+ }
179
+
180
+ inline int8_t AddFunc(int8_t x, int8_t y, const ArithmeticParams& params) {
181
+ const int32_t input1_val = params.input1_offset + x;
182
+ const int32_t input2_val = params.input2_offset + y;
183
+ const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
184
+ const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
185
+ const int32_t scaled_input1_val =
186
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
187
+ shifted_input1_val, params.input1_multiplier, params.input1_shift);
188
+ const int32_t scaled_input2_val =
189
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
190
+ shifted_input2_val, params.input2_multiplier, params.input2_shift);
191
+ const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
192
+ const int32_t raw_output =
193
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
194
+ raw_sum, params.output_multiplier, params.output_shift) +
195
+ params.output_offset;
196
+ const int32_t clamped_output =
197
+ std::min(params.quantized_activation_max,
198
+ std::max(params.quantized_activation_min, raw_output));
199
+ return static_cast<int8_t>(clamped_output);
200
+ }
201
+
202
+ // Element-wise add that can often be used for inner loop of broadcast add as
203
+ // well as the non-broadcast add.
204
+ inline void AddElementwise(int size, const ArithmeticParams& params,
205
+ const int8_t* input1_data, const int8_t* input2_data,
206
+ int8_t* output_data) {
207
+ ElementWise(size, params, input1_data, input2_data, output_data,
208
+ CheckArithmeticParams, AddFunc);
209
+ }
210
+
211
+ inline void Add(const ArithmeticParams& params,
212
+ const RuntimeShape& input1_shape, const int8_t* input1_data,
213
+ const RuntimeShape& input2_shape, const int8_t* input2_data,
214
+ const RuntimeShape& output_shape, int8_t* output_data) {
215
+ CheckArithmeticParams(params);
216
+
217
+ const int flat_size =
218
+ MatchingElementsSize(input1_shape, input2_shape, output_shape);
219
+
220
+ AddElementwise(flat_size, params, input1_data, input2_data, output_data);
221
+ }
222
+
223
+ inline void BroadcastAdd6DSlow(const ArithmeticParams& params,
224
+ const RuntimeShape& input1_shape,
225
+ const int8_t* input1_data,
226
+ const RuntimeShape& input2_shape,
227
+ const int8_t* input2_data,
228
+ const RuntimeShape& output_shape,
229
+ int8_t* output_data) {
230
+ BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape,
231
+ input2_data, output_shape, output_data,
232
+ CheckArithmeticParams, AddFunc);
233
+ }
234
+
235
+ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
236
+ const RuntimeShape& input1_shape,
237
+ const int8_t* input1_data,
238
+ const RuntimeShape& input2_shape,
239
+ const int8_t* input2_data,
240
+ const RuntimeShape& output_shape,
241
+ int8_t* output_data) {
242
+ BroadcastBinaryFunction6DSlow(params, input1_shape, input1_data, input2_shape,
243
+ input2_data, output_shape, output_data,
244
+ CheckArithmeticParams, AddFunc);
245
+ }
246
+
247
+ } // namespace reference_integer_ops
248
+ } // namespace tflite_micro
249
+
250
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
@@ -0,0 +1,241 @@
1
+ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
17
+
18
+ #include <algorithm>
19
+
20
+ #include "tensorflow/lite/kernels/internal/common.h"
21
+
22
+ namespace tflite_micro {
23
+ namespace reference_integer_ops {
24
+
25
+ // Fixed-point per-channel-quantization convolution reference kernel.
26
+ inline void ConvPerChannel(
27
+ const ConvParams& params, const int32_t* output_multiplier,
28
+ const int32_t* output_shift, const RuntimeShape& input_shape,
29
+ const int8_t* input_data, const RuntimeShape& filter_shape,
30
+ const int8_t* filter_data, const RuntimeShape& bias_shape,
31
+ const int32_t* bias_data, const RuntimeShape& output_shape,
32
+ int8_t* output_data) {
33
+ // Get parameters.
34
+ const int32_t input_offset = params.input_offset; // r = s(q - Z)
35
+ const int stride_width = params.stride_width;
36
+ const int stride_height = params.stride_height;
37
+ const int dilation_width_factor = params.dilation_width_factor;
38
+ const int dilation_height_factor = params.dilation_height_factor;
39
+ const int pad_width = params.padding_values.width;
40
+ const int pad_height = params.padding_values.height;
41
+ const int32_t output_offset = params.output_offset;
42
+
43
+ // Set min and max value of the output.
44
+ const int32_t output_activation_min = params.quantized_activation_min;
45
+ const int32_t output_activation_max = params.quantized_activation_max;
46
+
47
+ // Consistency check.
48
+ TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
49
+ TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
50
+ TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
51
+ TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
52
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
53
+ const int input_depth = input_shape.Dims(3);
54
+ const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
55
+ if (bias_data) {
56
+ TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
57
+ }
58
+
59
+ // Check dimensions of the tensors.
60
+ const int input_height = input_shape.Dims(1);
61
+ const int input_width = input_shape.Dims(2);
62
+ const int filter_height = filter_shape.Dims(1);
63
+ const int filter_width = filter_shape.Dims(2);
64
+ const int filter_input_depth = filter_shape.Dims(3);
65
+ const int groups = input_depth / filter_input_depth;
66
+ TFLITE_DCHECK_NE(groups, 0);
67
+ TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
68
+ const int filters_per_group = output_depth / groups;
69
+ TFLITE_DCHECK_NE(filters_per_group, 0);
70
+ const int output_height = output_shape.Dims(1);
71
+ const int output_width = output_shape.Dims(2);
72
+ for (int batch = 0; batch < batches; ++batch) {
73
+ for (int out_y = 0; out_y < output_height; ++out_y) {
74
+ const int in_y_origin = (out_y * stride_height) - pad_height;
75
+ for (int out_x = 0; out_x < output_width; ++out_x) {
76
+ const int in_x_origin = (out_x * stride_width) - pad_width;
77
+ for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
78
+ auto group = out_channel / filters_per_group;
79
+ int32_t acc = 0;
80
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
81
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
82
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
83
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
84
+
85
+ // Zero padding by omitting the areas outside the image.
86
+ const bool is_point_inside_image =
87
+ (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
88
+ (in_y < input_height);
89
+
90
+ if (!is_point_inside_image) {
91
+ continue;
92
+ }
93
+
94
+ for (int in_channel = 0; in_channel < filter_input_depth;
95
+ ++in_channel) {
96
+ int32_t input_val =
97
+ input_data[Offset(input_shape, batch, in_y, in_x,
98
+ in_channel + group * filter_input_depth)];
99
+ int32_t filter_val = filter_data[Offset(
100
+ filter_shape, out_channel, filter_y, filter_x, in_channel)];
101
+ // Accumulate with 32 bits accumulator.
102
+ // In the nudging process during model quantization, we force
103
+ // real value of 0.0 be represented by a quantized value. This
104
+ // guarantees that the input_offset is a int8_t, even though
105
+ // it is represented using int32_t. int32_t += int8_t *
106
+ // (int8_t - int8_t) so the highest value we can get from each
107
+ // accumulation is [-127, 127] * ([-128, 127] -
108
+ // [-128, 127]), which is [-32512, 32512]. log2(32512)
109
+ // = 14.98, which means we can accumulate at least 2^16
110
+ // multiplications without overflow. The accumulator is
111
+ // applied to a filter so the accumulation logic will hold as
112
+ // long as the filter size (filter_y * filter_x * in_channel)
113
+ // does not exceed 2^16, which is the case in all the models
114
+ // we have seen so far.
115
+ // TODO(b/174275578): Add a check to make sure the
116
+ // accumulator depth is smaller than 2^16.
117
+ acc += filter_val * (input_val + input_offset);
118
+ }
119
+ }
120
+ }
121
+
122
+ if (bias_data) {
123
+ acc += bias_data[out_channel];
124
+ }
125
+ acc = MultiplyByQuantizedMultiplier(
126
+ acc, output_multiplier[out_channel], output_shift[out_channel]);
127
+ acc += output_offset;
128
+ acc = std::max(acc, output_activation_min);
129
+ acc = std::min(acc, output_activation_max);
130
+ output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
131
+ static_cast<int8_t>(acc);
132
+ }
133
+ }
134
+ }
135
+ }
136
+ }
137
+
138
+
139
+ // Fixed-point per-channel-quantization convolution reference kernel.
140
+ // 16-bit data and 8-bit filter
141
+ template <typename AccumScalar>
142
+ inline void ConvPerChannel(
143
+ const ConvParams& params, const int32_t* output_multiplier,
144
+ const int32_t* output_shift, const RuntimeShape& input_shape,
145
+ const int16_t* input_data, const RuntimeShape& filter_shape,
146
+ const int8_t* filter_data, const RuntimeShape& bias_shape,
147
+ const AccumScalar* bias_data, const RuntimeShape& output_shape,
148
+ int16_t* output_data) {
149
+ // Get parameters.
150
+ const int stride_width = params.stride_width;
151
+ const int stride_height = params.stride_height;
152
+ const int dilation_width_factor = params.dilation_width_factor;
153
+ const int dilation_height_factor = params.dilation_height_factor;
154
+ const int pad_width = params.padding_values.width;
155
+ const int pad_height = params.padding_values.height;
156
+
157
+ // Set min and max value of the output.
158
+ const int32_t output_activation_min = params.quantized_activation_min;
159
+ const int32_t output_activation_max = params.quantized_activation_max;
160
+
161
+ // Consistency check.
162
+ TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
163
+ TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
164
+ TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
165
+ TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
166
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
167
+ const int input_depth = input_shape.Dims(3);
168
+ const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
169
+ if (bias_data) {
170
+ TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
171
+ }
172
+
173
+ // Check dimensions of the tensors.
174
+ const int input_height = input_shape.Dims(1);
175
+ const int input_width = input_shape.Dims(2);
176
+ const int filter_height = filter_shape.Dims(1);
177
+ const int filter_width = filter_shape.Dims(2);
178
+ const int filter_input_depth = filter_shape.Dims(3);
179
+ const int groups = input_depth / filter_input_depth;
180
+ TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
181
+ const int filters_per_group = output_depth / groups;
182
+ const int output_height = output_shape.Dims(1);
183
+ const int output_width = output_shape.Dims(2);
184
+ for (int batch = 0; batch < batches; ++batch) {
185
+ for (int out_y = 0; out_y < output_height; ++out_y) {
186
+ const int in_y_origin = (out_y * stride_height) - pad_height;
187
+ for (int out_x = 0; out_x < output_width; ++out_x) {
188
+ const int in_x_origin = (out_x * stride_width) - pad_width;
189
+ for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
190
+ auto group = out_channel / filters_per_group;
191
+ AccumScalar acc = 0;
192
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
193
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
194
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
195
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
196
+
197
+ // Zero padding by omitting the areas outside the image.
198
+ const bool is_point_inside_image =
199
+ (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
200
+ (in_y < input_height);
201
+
202
+ if (!is_point_inside_image) {
203
+ continue;
204
+ }
205
+
206
+ for (int in_channel = 0; in_channel < filter_input_depth;
207
+ ++in_channel) {
208
+ int32_t input_val =
209
+ input_data[Offset(input_shape, batch, in_y, in_x,
210
+ in_channel + group * filter_input_depth)];
211
+ int32_t filter_val = filter_data[Offset(
212
+ filter_shape, out_channel, filter_y, filter_x, in_channel)];
213
+ // Accumulate with 64 bits accumulator.
214
+ // int64_t += int8_t * int16_t so the highest value we can
215
+ // get from each accumulation is [-127, 127] * ([-32768,
216
+ // 32767] -
217
+ // [-32768, 32767]), which is [-8322945, 8322945].
218
+ // log2(8322945) = 22.99.
219
+ acc += filter_val * input_val;
220
+ }
221
+ }
222
+ }
223
+ if (bias_data) {
224
+ acc += bias_data[out_channel];
225
+ }
226
+ int32_t scaled_acc = MultiplyByQuantizedMultiplier(
227
+ acc, output_multiplier[out_channel], output_shift[out_channel]);
228
+ scaled_acc = std::max(scaled_acc, output_activation_min);
229
+ scaled_acc = std::min(scaled_acc, output_activation_max);
230
+ output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
231
+ static_cast<int16_t>(scaled_acc);
232
+ }
233
+ }
234
+ }
235
+ }
236
+ }
237
+
238
+ } // namespace reference_integer_ops
239
+ } // namespace tflite_micro
240
+
241
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_