xmos-ai-tools 1.3.2.dev80__py3-none-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (395) hide show
  1. xmos_ai_tools/__init__.py +7 -0
  2. xmos_ai_tools/io_server/__init__.py +151 -0
  3. xmos_ai_tools/runtime/__init__.py +0 -0
  4. xmos_ai_tools/runtime/buildfiles/aitoolslib.cmake +13 -0
  5. xmos_ai_tools/runtime/buildfiles/aitoolslib.make +8 -0
  6. xmos_ai_tools/runtime/include/flash_server.h +74 -0
  7. xmos_ai_tools/runtime/include/flatbuffers/allocator.h +68 -0
  8. xmos_ai_tools/runtime/include/flatbuffers/array.h +243 -0
  9. xmos_ai_tools/runtime/include/flatbuffers/base.h +474 -0
  10. xmos_ai_tools/runtime/include/flatbuffers/bfbs_generator.h +43 -0
  11. xmos_ai_tools/runtime/include/flatbuffers/buffer.h +142 -0
  12. xmos_ai_tools/runtime/include/flatbuffers/buffer_ref.h +53 -0
  13. xmos_ai_tools/runtime/include/flatbuffers/code_generators.h +235 -0
  14. xmos_ai_tools/runtime/include/flatbuffers/default_allocator.h +64 -0
  15. xmos_ai_tools/runtime/include/flatbuffers/detached_buffer.h +114 -0
  16. xmos_ai_tools/runtime/include/flatbuffers/flatbuffer_builder.h +1197 -0
  17. xmos_ai_tools/runtime/include/flatbuffers/flatbuffers.h +270 -0
  18. xmos_ai_tools/runtime/include/flatbuffers/flatc.h +111 -0
  19. xmos_ai_tools/runtime/include/flatbuffers/flexbuffers.h +1897 -0
  20. xmos_ai_tools/runtime/include/flatbuffers/grpc.h +300 -0
  21. xmos_ai_tools/runtime/include/flatbuffers/hash.h +127 -0
  22. xmos_ai_tools/runtime/include/flatbuffers/idl.h +1232 -0
  23. xmos_ai_tools/runtime/include/flatbuffers/minireflect.h +419 -0
  24. xmos_ai_tools/runtime/include/flatbuffers/pch/flatc_pch.h +39 -0
  25. xmos_ai_tools/runtime/include/flatbuffers/pch/pch.h +38 -0
  26. xmos_ai_tools/runtime/include/flatbuffers/reflection.h +502 -0
  27. xmos_ai_tools/runtime/include/flatbuffers/reflection_generated.h +1449 -0
  28. xmos_ai_tools/runtime/include/flatbuffers/registry.h +128 -0
  29. xmos_ai_tools/runtime/include/flatbuffers/stl_emulation.h +509 -0
  30. xmos_ai_tools/runtime/include/flatbuffers/string.h +64 -0
  31. xmos_ai_tools/runtime/include/flatbuffers/struct.h +53 -0
  32. xmos_ai_tools/runtime/include/flatbuffers/table.h +168 -0
  33. xmos_ai_tools/runtime/include/flatbuffers/util.h +690 -0
  34. xmos_ai_tools/runtime/include/flatbuffers/vector.h +370 -0
  35. xmos_ai_tools/runtime/include/flatbuffers/vector_downward.h +271 -0
  36. xmos_ai_tools/runtime/include/flatbuffers/verifier.h +283 -0
  37. xmos_ai_tools/runtime/include/ioserver.h +44 -0
  38. xmos_ai_tools/runtime/include/lib_nn/api/TransposeConv.h +24 -0
  39. xmos_ai_tools/runtime/include/lib_nn/api/add_int16.h +27 -0
  40. xmos_ai_tools/runtime/include/lib_nn/api/add_int16_transform.h +42 -0
  41. xmos_ai_tools/runtime/include/lib_nn/api/dequantize_int16.h +22 -0
  42. xmos_ai_tools/runtime/include/lib_nn/api/dequantize_int16_transform.h +34 -0
  43. xmos_ai_tools/runtime/include/lib_nn/api/expand_8_to_16.h +8 -0
  44. xmos_ai_tools/runtime/include/lib_nn/api/multiply_int16.h +42 -0
  45. xmos_ai_tools/runtime/include/lib_nn/api/multiply_int16_transform.h +71 -0
  46. xmos_ai_tools/runtime/include/lib_nn/api/nn_api.h +15 -0
  47. xmos_ai_tools/runtime/include/lib_nn/api/nn_bin_types.h +14 -0
  48. xmos_ai_tools/runtime/include/lib_nn/api/nn_config.h +287 -0
  49. xmos_ai_tools/runtime/include/lib_nn/api/nn_conv2d_structs.h +72 -0
  50. xmos_ai_tools/runtime/include/lib_nn/api/nn_image.h +26 -0
  51. xmos_ai_tools/runtime/include/lib_nn/api/nn_layers.h +303 -0
  52. xmos_ai_tools/runtime/include/lib_nn/api/nn_op_helper.h +132 -0
  53. xmos_ai_tools/runtime/include/lib_nn/api/nn_op_utils.h +150 -0
  54. xmos_ai_tools/runtime/include/lib_nn/api/nn_operator.h +18 -0
  55. xmos_ai_tools/runtime/include/lib_nn/api/nn_pooling.h +551 -0
  56. xmos_ai_tools/runtime/include/lib_nn/api/nn_types.h +83 -0
  57. xmos_ai_tools/runtime/include/lib_nn/api/nn_window_params.h +55 -0
  58. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16.h +54 -0
  59. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16_kernel_transform.h +37 -0
  60. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16_mappings.h +13 -0
  61. xmos_ai_tools/runtime/include/lib_nn/api/quadratic_approximation.h +82 -0
  62. xmos_ai_tools/runtime/include/lib_nn/api/quadratic_interpolation.h +23 -0
  63. xmos_ai_tools/runtime/include/lib_nn/api/quantize_int16.h +22 -0
  64. xmos_ai_tools/runtime/include/lib_nn/api/quantize_int16_transform.h +33 -0
  65. xmos_ai_tools/runtime/include/lib_nn/api/version.h +13 -0
  66. xmos_ai_tools/runtime/include/lib_nn/api/vpu_memmove_word_aligned.h +15 -0
  67. xmos_ai_tools/runtime/include/lib_nn/api/vpu_memset_256.h +55 -0
  68. xmos_ai_tools/runtime/include/lib_nn/api/vpu_sim.h +118 -0
  69. xmos_ai_tools/runtime/include/lib_nn/api/xs3_vpu.h +216 -0
  70. xmos_ai_tools/runtime/include/lib_nn/api/xs3a_registers.h +2869 -0
  71. xmos_ai_tools/runtime/include/lib_nn/src/asm/asm_constants.h +41 -0
  72. xmos_ai_tools/runtime/include/lib_nn/src/asm/window_op_plan.h +25 -0
  73. xmos_ai_tools/runtime/include/lib_tflite_micro/api/fast_flash.h +47 -0
  74. xmos_ai_tools/runtime/include/lib_tflite_micro/api/inference_engine.h +218 -0
  75. xmos_ai_tools/runtime/include/lib_tflite_micro/api/memory_parallel_transport.h +52 -0
  76. xmos_ai_tools/runtime/include/lib_tflite_micro/api/version.h +13 -0
  77. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_config.h +17 -0
  78. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_device_memory.h +62 -0
  79. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_shared_config.h +31 -0
  80. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/conv2d_float.h +155 -0
  81. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_common.h +19 -0
  82. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_custom_options.h +28 -0
  83. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_error_reporter.h +32 -0
  84. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_interpreter.h +49 -0
  85. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_ops.h +71 -0
  86. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_profiler.h +49 -0
  87. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_utils.h +160 -0
  88. xmos_ai_tools/runtime/include/lib_tflite_micro/src/thread_call.h +119 -0
  89. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_defs.h +4 -0
  90. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_device.h +4 -0
  91. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_std_descriptors.h +4 -0
  92. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_std_requests.h +4 -0
  93. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud.h +518 -0
  94. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_conf_default.h +11 -0
  95. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_device.h +87 -0
  96. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_std_descriptors.h +191 -0
  97. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_std_requests.h +120 -0
  98. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/XUD_USB_Defines.h +70 -0
  99. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/hid.h +23 -0
  100. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudio10.h +30 -0
  101. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudio20.h +357 -0
  102. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudiocommon.h +168 -0
  103. xmos_ai_tools/runtime/include/signal/micro/kernels/delay_flexbuffers_generated_data.h +25 -0
  104. xmos_ai_tools/runtime/include/signal/micro/kernels/energy_flexbuffers_generated_data.h +28 -0
  105. xmos_ai_tools/runtime/include/signal/micro/kernels/fft_flexbuffers_generated_data.h +37 -0
  106. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_flexbuffers_generated_data.h +25 -0
  107. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_log_flexbuffers_generated_data.h +27 -0
  108. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_spectral_subtraction_flexbuffers_generated_data.h +26 -0
  109. xmos_ai_tools/runtime/include/signal/micro/kernels/framer_flexbuffers_generated_data.h +25 -0
  110. xmos_ai_tools/runtime/include/signal/micro/kernels/irfft.h +31 -0
  111. xmos_ai_tools/runtime/include/signal/micro/kernels/overlap_add_flexbuffers_generated_data.h +25 -0
  112. xmos_ai_tools/runtime/include/signal/micro/kernels/pcan_flexbuffers_generated_data.h +7 -0
  113. xmos_ai_tools/runtime/include/signal/micro/kernels/rfft.h +31 -0
  114. xmos_ai_tools/runtime/include/signal/micro/kernels/stacker_flexbuffers_generated_data.h +25 -0
  115. xmos_ai_tools/runtime/include/signal/micro/kernels/window_flexbuffers_generated_data.h +25 -0
  116. xmos_ai_tools/runtime/include/signal/src/circular_buffer.h +118 -0
  117. xmos_ai_tools/runtime/include/signal/src/complex.h +29 -0
  118. xmos_ai_tools/runtime/include/signal/src/energy.h +38 -0
  119. xmos_ai_tools/runtime/include/signal/src/fft_auto_scale.h +35 -0
  120. xmos_ai_tools/runtime/include/signal/src/filter_bank.h +69 -0
  121. xmos_ai_tools/runtime/include/signal/src/filter_bank_log.h +38 -0
  122. xmos_ai_tools/runtime/include/signal/src/filter_bank_spectral_subtraction.h +73 -0
  123. xmos_ai_tools/runtime/include/signal/src/filter_bank_square_root.h +34 -0
  124. xmos_ai_tools/runtime/include/signal/src/irfft.h +84 -0
  125. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_common.h +49 -0
  126. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_float.h +31 -0
  127. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int16.h +30 -0
  128. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int32.h +31 -0
  129. xmos_ai_tools/runtime/include/signal/src/log.h +30 -0
  130. xmos_ai_tools/runtime/include/signal/src/max_abs.h +31 -0
  131. xmos_ai_tools/runtime/include/signal/src/msb.h +32 -0
  132. xmos_ai_tools/runtime/include/signal/src/overlap_add.h +46 -0
  133. xmos_ai_tools/runtime/include/signal/src/pcan_argc_fixed.h +41 -0
  134. xmos_ai_tools/runtime/include/signal/src/rfft.h +85 -0
  135. xmos_ai_tools/runtime/include/signal/src/square_root.h +32 -0
  136. xmos_ai_tools/runtime/include/signal/src/window.h +31 -0
  137. xmos_ai_tools/runtime/include/signal/testdata/fft_test_data.h +48 -0
  138. xmos_ai_tools/runtime/include/tensorflow/lite/array.h +156 -0
  139. xmos_ai_tools/runtime/include/tensorflow/lite/builtin_op_data.h +22 -0
  140. xmos_ai_tools/runtime/include/tensorflow/lite/builtin_ops.h +241 -0
  141. xmos_ai_tools/runtime/include/tensorflow/lite/c/builtin_op_data.h +20 -0
  142. xmos_ai_tools/runtime/include/tensorflow/lite/c/c_api_types.h +26 -0
  143. xmos_ai_tools/runtime/include/tensorflow/lite/c/common.h +30 -0
  144. xmos_ai_tools/runtime/include/tensorflow/lite/context_util.h +54 -0
  145. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/error_reporter.h +72 -0
  146. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/flatbuffer_conversions.h +440 -0
  147. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/tensor_utils.h +28 -0
  148. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/builtin_op_data.h +626 -0
  149. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/c_api_types.h +178 -0
  150. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/common.h +1496 -0
  151. xmos_ai_tools/runtime/include/tensorflow/lite/core/macros.h +78 -0
  152. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/bits.h +102 -0
  153. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft.h +50 -0
  154. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft_io.h +34 -0
  155. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft_util.h +34 -0
  156. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank.h +63 -0
  157. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank_io.h +35 -0
  158. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h +50 -0
  159. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend.h +64 -0
  160. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend_io.h +31 -0
  161. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h +52 -0
  162. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h +48 -0
  163. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h +33 -0
  164. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_lut.h +40 -0
  165. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale.h +39 -0
  166. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale_io.h +33 -0
  167. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h +45 -0
  168. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h +46 -0
  169. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_io.h +36 -0
  170. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h +50 -0
  171. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h +47 -0
  172. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h +57 -0
  173. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window.h +49 -0
  174. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window_io.h +34 -0
  175. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window_util.h +45 -0
  176. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/common.h +1358 -0
  177. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/compatibility.h +122 -0
  178. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/cppmath.h +40 -0
  179. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/max.h +35 -0
  180. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/min.h +35 -0
  181. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/optimized/neon_check.h +20 -0
  182. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/portable_tensor.h +141 -0
  183. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/portable_tensor_utils.h +623 -0
  184. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/quantization_util.h +292 -0
  185. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/add.h +561 -0
  186. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/add_n.h +86 -0
  187. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/arg_min_max.h +88 -0
  188. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_matmul.h +275 -0
  189. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h +101 -0
  190. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/binary_function.h +91 -0
  191. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/broadcast_args.h +56 -0
  192. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/broadcast_to.h +97 -0
  193. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/ceil.h +37 -0
  194. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/comparisons.h +271 -0
  195. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/concatenation.h +141 -0
  196. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/conv.h +289 -0
  197. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/cumsum.h +175 -0
  198. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depth_to_space.h +79 -0
  199. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h +100 -0
  200. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h +319 -0
  201. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/dequantize.h +78 -0
  202. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/div.h +247 -0
  203. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/elu.h +37 -0
  204. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/exp.h +38 -0
  205. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/fill.h +38 -0
  206. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor.h +39 -0
  207. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor_div.h +35 -0
  208. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor_mod.h +44 -0
  209. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/fully_connected.h +323 -0
  210. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/hard_swish.h +168 -0
  211. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/add.h +250 -0
  212. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h +241 -0
  213. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h +291 -0
  214. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h +126 -0
  215. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h +67 -0
  216. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h +121 -0
  217. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h +18 -0
  218. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h +194 -0
  219. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h +264 -0
  220. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h +117 -0
  221. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h +224 -0
  222. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/l2normalization.h +90 -0
  223. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/leaky_relu.h +69 -0
  224. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/log_softmax.h +256 -0
  225. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/logistic.h +132 -0
  226. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/lstm_cell.h +422 -0
  227. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/maximum_minimum.h +64 -0
  228. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/mul.h +267 -0
  229. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/neg.h +37 -0
  230. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/pad.h +169 -0
  231. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/pooling.h +303 -0
  232. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h +333 -0
  233. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h +244 -0
  234. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/prelu.h +111 -0
  235. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h +140 -0
  236. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/quantize.h +89 -0
  237. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/reduce.h +491 -0
  238. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/requantize.h +70 -0
  239. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/resize_bilinear.h +233 -0
  240. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h +102 -0
  241. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/round.h +51 -0
  242. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/select.h +151 -0
  243. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/slice.h +80 -0
  244. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/softmax.h +233 -0
  245. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h +109 -0
  246. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/space_to_depth.h +80 -0
  247. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/strided_slice.h +147 -0
  248. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/sub.h +465 -0
  249. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/tanh.h +129 -0
  250. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/transpose.h +203 -0
  251. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/transpose_conv.h +225 -0
  252. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/runtime_shape.h +168 -0
  253. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/strided_slice_logic.h +278 -0
  254. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/tensor_ctypes.h +42 -0
  255. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/types.h +1096 -0
  256. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/kernel_util.h +341 -0
  257. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/op_macros.h +49 -0
  258. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/padding.h +115 -0
  259. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h +100 -0
  260. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h +104 -0
  261. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h +58 -0
  262. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h +63 -0
  263. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h +144 -0
  264. xmos_ai_tools/runtime/include/tensorflow/lite/micro/benchmarks/micro_benchmark.h +95 -0
  265. xmos_ai_tools/runtime/include/tensorflow/lite/micro/compatibility.h +32 -0
  266. xmos_ai_tools/runtime/include/tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h +49 -0
  267. xmos_ai_tools/runtime/include/tensorflow/lite/micro/debug_log.h +38 -0
  268. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/micro_model_settings.h +37 -0
  269. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/expected_output_data.h +47 -0
  270. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/input_data.h +108 -0
  271. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/network_model.h +166 -0
  272. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/detection_responder.h +32 -0
  273. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/image_provider.h +38 -0
  274. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/main_functions.h +37 -0
  275. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/model_settings.h +35 -0
  276. xmos_ai_tools/runtime/include/tensorflow/lite/micro/fake_micro_context.h +70 -0
  277. xmos_ai_tools/runtime/include/tensorflow/lite/micro/flatbuffer_utils.h +65 -0
  278. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/activation_utils.h +57 -0
  279. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/activations.h +64 -0
  280. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/add.h +78 -0
  281. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_function_specializations.h +141 -0
  282. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_interface.h +75 -0
  283. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h +56 -0
  284. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h +310 -0
  285. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h +145 -0
  286. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h +78 -0
  287. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/ceva_common.h +24 -0
  288. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/ceva_tflm_lib.h +613 -0
  289. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/mcps_macros.h +115 -0
  290. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/types.h +1286 -0
  291. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/circular_buffer.h +45 -0
  292. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/circular_buffer_flexbuffers_generated_data.h +22 -0
  293. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv.h +117 -0
  294. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv_test.h +94 -0
  295. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/depthwise_conv.h +80 -0
  296. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/dequantize.h +38 -0
  297. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/detection_postprocess_flexbuffers_generated_data.h +25 -0
  298. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ethosu.h +28 -0
  299. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/fully_connected.h +112 -0
  300. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/hard_swish.h +30 -0
  301. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_runner.h +86 -0
  302. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_util.h +150 -0
  303. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/leaky_relu.h +43 -0
  304. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/logical.h +35 -0
  305. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/logistic.h +42 -0
  306. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_eval.h +541 -0
  307. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_eval_test.h +817 -0
  308. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_shared.h +150 -0
  309. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/micro_ops.h +158 -0
  310. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/micro_tensor_utils.h +56 -0
  311. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/mul.h +74 -0
  312. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/pad.h +27 -0
  313. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/pooling.h +142 -0
  314. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/prelu.h +39 -0
  315. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/quantize.h +37 -0
  316. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reduce.h +65 -0
  317. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reshape.h +26 -0
  318. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/softmax.h +67 -0
  319. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/strided_slice.h +40 -0
  320. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/sub.h +60 -0
  321. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/svdf.h +100 -0
  322. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/testdata/conv_test_data.h +37 -0
  323. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h +579 -0
  324. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h +47 -0
  325. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/hifimini/fixedpoint_utils.h +139 -0
  326. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_eval.h +216 -0
  327. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_shared.h +78 -0
  328. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa.h +38 -0
  329. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_add.h +48 -0
  330. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h +89 -0
  331. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_depthwise_conv.h +74 -0
  332. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_fully_connected.h +78 -0
  333. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_pad.h +49 -0
  334. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_pooling.h +76 -0
  335. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_reduce.h +47 -0
  336. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_reshape.h +44 -0
  337. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_softmax.h +58 -0
  338. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_svdf.h +39 -0
  339. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_helpers.h +64 -0
  340. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h +170 -0
  341. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/linear_memory_planner.h +53 -0
  342. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/memory_plan_struct.h +73 -0
  343. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/micro_memory_planner.h +95 -0
  344. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h +133 -0
  345. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_allocation_info.h +138 -0
  346. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_allocator.h +351 -0
  347. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_arena_constants.h +28 -0
  348. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_common.h +38 -0
  349. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_context.h +176 -0
  350. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_graph.h +79 -0
  351. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter.h +189 -0
  352. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_context.h +125 -0
  353. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_graph.h +110 -0
  354. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_log.h +42 -0
  355. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_mutable_op_resolver.h +708 -0
  356. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_op_resolver.h +62 -0
  357. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_profiler.h +140 -0
  358. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_profiler_interface.h +38 -0
  359. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_resource_variable.h +89 -0
  360. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_time.h +36 -0
  361. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_utils.h +162 -0
  362. xmos_ai_tools/runtime/include/tensorflow/lite/micro/mock_micro_graph.h +60 -0
  363. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/interpreter/src/python_ops_resolver.h +21 -0
  364. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/tflite_size/src/flatbuffer_size.h +30 -0
  365. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/tflite_size/src/flatbuffer_size_wrapper.h +33 -0
  366. xmos_ai_tools/runtime/include/tensorflow/lite/micro/recording_micro_allocator.h +125 -0
  367. xmos_ai_tools/runtime/include/tensorflow/lite/micro/recording_micro_interpreter.h +69 -0
  368. xmos_ai_tools/runtime/include/tensorflow/lite/micro/system_setup.h +27 -0
  369. xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helper_custom_ops.h +49 -0
  370. xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helpers.h +334 -0
  371. xmos_ai_tools/runtime/include/tensorflow/lite/micro/testing/micro_test.h +267 -0
  372. xmos_ai_tools/runtime/include/tensorflow/lite/micro/testing/test_conv_model.h +23 -0
  373. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tflite_bridge/flatbuffer_conversions_bridge.h +45 -0
  374. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tflite_bridge/micro_error_reporter.h +36 -0
  375. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/log_utils.h +273 -0
  376. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/metrics.h +41 -0
  377. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/op_resolver.h +127 -0
  378. xmos_ai_tools/runtime/include/tensorflow/lite/portable_type_to_tflitetype.h +75 -0
  379. xmos_ai_tools/runtime/include/tensorflow/lite/schema/schema_generated.h +24644 -0
  380. xmos_ai_tools/runtime/include/tensorflow/lite/schema/schema_utils.h +33 -0
  381. xmos_ai_tools/runtime/include/tile_ram_server.h +38 -0
  382. xmos_ai_tools/runtime/lib/libhost_xtflitemicro.a +0 -0
  383. xmos_ai_tools/runtime/lib/libxtflitemicro.a +0 -0
  384. xmos_ai_tools/xformer/__init__.py +60 -0
  385. xmos_ai_tools/xformer/flash.py +190 -0
  386. xmos_ai_tools/xinterpreters/__init__.py +1 -0
  387. xmos_ai_tools/xinterpreters/exceptions.py +38 -0
  388. xmos_ai_tools/xinterpreters/host_interpreter.py +652 -0
  389. xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.1.0.1.dylib +0 -0
  390. xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.dylib +0 -0
  391. xmos_ai_tools-1.3.2.dev80.data/data/bin/xcore-opt +0 -0
  392. xmos_ai_tools-1.3.2.dev80.dist-info/METADATA +33 -0
  393. xmos_ai_tools-1.3.2.dev80.dist-info/RECORD +395 -0
  394. xmos_ai_tools-1.3.2.dev80.dist-info/WHEEL +5 -0
  395. xmos_ai_tools-1.3.2.dev80.dist-info/top_level.txt +1 -0
@@ -0,0 +1,319 @@
1
+ /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
17
+
18
+ #include <algorithm>
19
+
20
+ #include "fixedpoint/fixedpoint.h"
21
+ #include "tensorflow/lite/kernels/internal/common.h"
22
+ #include "tensorflow/lite/kernels/internal/compatibility.h"
23
+ #include "tensorflow/lite/kernels/internal/types.h"
24
+
25
+ namespace tflite_micro {
26
+
27
+ // Used in tests and template parameters to control which version of depthwise
28
+ // convolution is called. Primarily for reference code, and specializations
29
+ // forced in tests.
30
+ enum class DepthwiseConvImplementation {
31
+ // Run all tests against kUseStandardEntry even if also testing another
32
+ // kernel, since we need to be sure that the main DepthwiseConv() function in
33
+ // optimized_ops.h dispatches to a correctly-executing kernel.
34
+ kNone = 0, // The "default" option: use the normal
35
+ // DepthwiseConv kernel (entry) function.
36
+ kUseGenericKernel, // Forced use of generic kernel.
37
+ kUseNeon3x3, // 3x3 kernel that uses NEON when available.
38
+ kUseNeon3x3DotProduct, // 3x3 kernel that uses dot-product enabled NEON
39
+ // when available.
40
+ kUseCModel3x3DotProduct, // 3x3 kernel, reference C model that is intended
41
+ // to match overall design NEON code.
42
+ kUseUnwound3x3DotProduct, // 3x3 kernel, reference C model with unwound loops
43
+ // and some arrays.
44
+ kUseIntrinsics3x3DotProduct, // 3x3 kernel using NEON intrinsics.
45
+ };
46
+
47
+ // Category of depthwise convolution output rounding.
48
+ enum class DepthwiseConvOutputRounding {
49
+ kNone = 0, // Invalid: specific method must be specified.
50
+ kAwayFromZero, // Original method: exact halves rounded away from zero.
51
+ kUpward, // Halves towards +infinity: adds 0.5 before truncate.
52
+ // This is where a future kNearestEven would be placed.
53
+ };
54
+
55
+ // Category of depthwise convolution depth multiplication.
56
+ enum class DepthwiseConvDepthMultiplication {
57
+ kNoMultiplication = 0, // Depth multiplier = 1.
58
+ kUnitInputDepth, // Input depth = 1, output depth = depth multiplier.
59
+ };
60
+
61
+ namespace reference_ops {
62
+ namespace depthwise_conv {
63
+
64
+ template <DepthwiseConvOutputRounding output_rounding>
65
+ inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
66
+ int shift) {
67
+ TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
68
+ return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
69
+ }
70
+
71
+ // Single-rounding MultiplyByQuantizedMultiplier
72
+ #if TFLITE_SINGLE_ROUNDING
73
+ template <>
74
+ inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
75
+ int32_t x, int32_t quantized_multiplier, int shift) {
76
+ using gemmlowp::RoundingDivideByPOT;
77
+ using gemmlowp::SaturatingRoundingDoublingHighMul;
78
+ int left_shift = shift > 0 ? shift : 0;
79
+ int right_shift = shift > 0 ? 0 : -shift;
80
+ return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
81
+ x * (1 << left_shift), quantized_multiplier),
82
+ right_shift);
83
+ }
84
+
85
+ template <>
86
+ inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
87
+ int32_t x, int32_t quantized_multiplier, int shift) {
88
+ return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
89
+ }
90
+ // Double-rounding MultiplyByQuantizedMultiplier
91
+ #else
92
+ template <>
93
+ inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
94
+ int32_t x, int32_t quantized_multiplier, int shift) {
95
+ return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
96
+ }
97
+
98
+ template <>
99
+ inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
100
+ int32_t x, int32_t quantized_multiplier, int shift) {
101
+ using gemmlowp::SaturatingRoundingDoublingHighMul;
102
+ const int left_shift = shift > 0 ? shift : 0;
103
+ const int right_shift = shift > 0 ? 0 : -shift;
104
+ const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
105
+ return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
106
+ quantized_multiplier) +
107
+ rounding_offset) >>
108
+ right_shift;
109
+ }
110
+ #endif // TFLITE_SINGLE_ROUNDING
111
+
112
+ template <DepthwiseConvOutputRounding output_rounding>
113
+ struct DepthwiseConvBasicKernel {
114
+ static inline void Run(
115
+ const DepthwiseParams& params, const RuntimeShape& input_shape,
116
+ const uint8_t* input_data, const RuntimeShape& filter_shape,
117
+ const uint8_t* filter_data, const RuntimeShape& bias_shape,
118
+ const int32_t* bias_data, const RuntimeShape& output_shape,
119
+ uint8_t* output_data) {
120
+ const int stride_width = params.stride_width;
121
+ const int stride_height = params.stride_height;
122
+ const int dilation_width_factor = params.dilation_width_factor;
123
+ const int dilation_height_factor = params.dilation_height_factor;
124
+ const int pad_width = params.padding_values.width;
125
+ const int pad_height = params.padding_values.height;
126
+ const int depth_multiplier = params.depth_multiplier;
127
+ const int32_t output_activation_min = params.quantized_activation_min;
128
+ const int32_t output_activation_max = params.quantized_activation_max;
129
+ const int32_t input_offset = params.input_offset;
130
+ const int32_t filter_offset = params.weights_offset;
131
+ const int32_t output_offset = params.output_offset;
132
+ const int32_t output_multiplier = params.output_multiplier;
133
+ const int output_shift = params.output_shift;
134
+ TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
135
+ TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
136
+ TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
137
+
138
+ TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
139
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
140
+ const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
141
+ const int input_height = input_shape.Dims(1);
142
+ const int input_width = input_shape.Dims(2);
143
+ const int input_depth = input_shape.Dims(3);
144
+ const int filter_height = filter_shape.Dims(1);
145
+ const int filter_width = filter_shape.Dims(2);
146
+ const int output_height = output_shape.Dims(1);
147
+ const int output_width = output_shape.Dims(2);
148
+ TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
149
+ TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
150
+
151
+ for (int b = 0; b < batches; ++b) {
152
+ for (int out_y = 0; out_y < output_height; ++out_y) {
153
+ for (int out_x = 0; out_x < output_width; ++out_x) {
154
+ for (int ic = 0; ic < input_depth; ++ic) {
155
+ for (int m = 0; m < depth_multiplier; m++) {
156
+ const int oc = m + ic * depth_multiplier;
157
+ const int in_x_origin = (out_x * stride_width) - pad_width;
158
+ const int in_y_origin = (out_y * stride_height) - pad_height;
159
+ int32_t acc = 0;
160
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
161
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
162
+ const int in_x =
163
+ in_x_origin + dilation_width_factor * filter_x;
164
+ const int in_y =
165
+ in_y_origin + dilation_height_factor * filter_y;
166
+ // If the location is outside the bounds of the input image,
167
+ // use zero as a default value.
168
+ if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
169
+ (in_y < input_height)) {
170
+ int32_t input_val =
171
+ input_data[Offset(input_shape, b, in_y, in_x, ic)];
172
+ int32_t filter_val = filter_data[Offset(
173
+ filter_shape, 0, filter_y, filter_x, oc)];
174
+ acc += (filter_val + filter_offset) *
175
+ (input_val + input_offset);
176
+ }
177
+ }
178
+ }
179
+ if (bias_data) {
180
+ acc += bias_data[oc];
181
+ }
182
+ acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
183
+ output_shift);
184
+ acc += output_offset;
185
+ acc = std::max(acc, output_activation_min);
186
+ acc = std::min(acc, output_activation_max);
187
+ output_data[Offset(output_shape, b, out_y, out_x, oc)] =
188
+ static_cast<uint8_t>(acc);
189
+ }
190
+ }
191
+ }
192
+ }
193
+ }
194
+ }
195
+
196
+ // TODO(b/148596273): Reconcile reference versions, perhaps with common
197
+ // MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
198
+ static inline void RunPerChannel(
199
+ const DepthwiseParams& params, const RuntimeShape& input_shape,
200
+ const int8_t* input_data, const RuntimeShape& filter_shape,
201
+ const int8_t* filter_data, const RuntimeShape& bias_shape,
202
+ const int32_t* bias_data, const RuntimeShape& output_shape,
203
+ int8_t* output_data) {
204
+ // Get parameters.
205
+ // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
206
+ const int stride_width = params.stride_width;
207
+ const int stride_height = params.stride_height;
208
+ const int dilation_width_factor = params.dilation_width_factor;
209
+ const int dilation_height_factor = params.dilation_height_factor;
210
+ const int pad_width = params.padding_values.width;
211
+ const int pad_height = params.padding_values.height;
212
+ const int depth_multiplier = params.depth_multiplier;
213
+ const int32_t input_offset = params.input_offset;
214
+ const int32_t output_offset = params.output_offset;
215
+ const int32_t output_activation_min = params.quantized_activation_min;
216
+ const int32_t output_activation_max = params.quantized_activation_max;
217
+ const int32_t* output_multiplier = params.output_multiplier_per_channel;
218
+ const int32_t* output_shift = params.output_shift_per_channel;
219
+
220
+ // Check dimensions of the tensors.
221
+ TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
222
+ TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
223
+ TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
224
+
225
+ TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
226
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
227
+ const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
228
+ const int input_height = input_shape.Dims(1);
229
+ const int input_width = input_shape.Dims(2);
230
+ const int input_depth = input_shape.Dims(3);
231
+ const int filter_height = filter_shape.Dims(1);
232
+ const int filter_width = filter_shape.Dims(2);
233
+ const int output_height = output_shape.Dims(1);
234
+ const int output_width = output_shape.Dims(2);
235
+ TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
236
+ TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
237
+
238
+ for (int batch = 0; batch < batches; ++batch) {
239
+ for (int out_y = 0; out_y < output_height; ++out_y) {
240
+ for (int out_x = 0; out_x < output_width; ++out_x) {
241
+ for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
242
+ for (int m = 0; m < depth_multiplier; ++m) {
243
+ const int output_channel = m + in_channel * depth_multiplier;
244
+ const int in_x_origin = (out_x * stride_width) - pad_width;
245
+ const int in_y_origin = (out_y * stride_height) - pad_height;
246
+ int32_t acc = 0;
247
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
248
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
249
+ const int in_x =
250
+ in_x_origin + dilation_width_factor * filter_x;
251
+ const int in_y =
252
+ in_y_origin + dilation_height_factor * filter_y;
253
+ // Zero padding by omitting the areas outside the image.
254
+ const bool is_point_inside_image =
255
+ (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
256
+ (in_y < input_height);
257
+ if (is_point_inside_image) {
258
+ int32_t input_val = input_data[Offset(
259
+ input_shape, batch, in_y, in_x, in_channel)];
260
+ int32_t filter_val = filter_data[Offset(
261
+ filter_shape, 0, filter_y, filter_x, output_channel)];
262
+ // Accumulate with 32 bits accumulator.
263
+ // In the nudging process during model quantization, we
264
+ // force real value of 0.0 be represented by a quantized
265
+ // value. This guarantees that the input_offset is a int8_t,
266
+ // even though it is represented using int32_t. int32_t +=
267
+ // int8_t
268
+ // * (int8_t - int8_t) so the highest value we can get from
269
+ // each accumulation is [-127, 127] * ([-128, 127] -
270
+ // [-128, 127]), which is [-32512, 32512]. log2(32512)
271
+ // = 14.98, which means we can accumulate at least 2^16
272
+ // multiplications without overflow. The accumulator is
273
+ // applied to a filter so the accumulation logic will hold
274
+ // as long as the filter size (filter_y * filter_x *
275
+ // in_channel) does not exceed 2^16, which is the case in
276
+ // all the models we have seen so far.
277
+ acc += filter_val * (input_val + input_offset);
278
+ }
279
+ }
280
+ }
281
+ if (bias_data) {
282
+ acc += bias_data[output_channel];
283
+ }
284
+ acc = DepthwiseConvRound<output_rounding>(
285
+ acc, output_multiplier[output_channel],
286
+ output_shift[output_channel]);
287
+ acc += output_offset;
288
+ acc = std::max(acc, output_activation_min);
289
+ acc = std::min(acc, output_activation_max);
290
+ output_data[Offset(output_shape, batch, out_y, out_x,
291
+ output_channel)] = static_cast<int8_t>(acc);
292
+ }
293
+ }
294
+ }
295
+ }
296
+ }
297
+ }
298
+ };
299
+
300
+ } // namespace depthwise_conv
301
+
302
+ inline void DepthwiseConv(
303
+ const DepthwiseParams& params, const RuntimeShape& input_shape,
304
+ const uint8_t* input_data, const RuntimeShape& filter_shape,
305
+ const uint8_t* filter_data, const RuntimeShape& bias_shape,
306
+ const int32_t* bias_data, const RuntimeShape& output_shape,
307
+ uint8_t* output_data) {
308
+ return depthwise_conv::DepthwiseConvBasicKernel<
309
+ DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
310
+ input_data, filter_shape,
311
+ filter_data, bias_shape,
312
+ bias_data, output_shape,
313
+ output_data);
314
+ }
315
+
316
+ } // namespace reference_ops
317
+ } // end namespace tflite
318
+
319
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
@@ -0,0 +1,78 @@
1
+ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
17
+
18
+ #include <limits.h>
19
+
20
+ #include <vector>
21
+
22
+ #include "tensorflow/lite/kernels/internal/common.h"
23
+ #include "tensorflow/lite/kernels/internal/types.h"
24
+
25
+ namespace tflite_micro {
26
+
27
+ namespace reference_ops {
28
+
29
+ // Dequantizes into a float without rounding.
30
+ template <typename InputT, typename OutputT>
31
+ inline void Dequantize(const tflite_micro::DequantizationParams& op_params,
32
+ const RuntimeShape& input_shape,
33
+ const InputT* input_data,
34
+ const RuntimeShape& output_shape, OutputT* output_data) {
35
+ int32_t zero_point = op_params.zero_point;
36
+ const double scale = op_params.scale;
37
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
38
+
39
+ for (int i = 0; i < flat_size; i++) {
40
+ const int32_t val = input_data[i];
41
+ const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
42
+ output_data[i] = result;
43
+ }
44
+ }
45
+
46
+ // Dequantizes per-channel quantized tensor to float.
47
+ template <typename T>
48
+ inline void PerChannelDequantize(
49
+ const tflite_micro::PerChannelDequantizationParams& op_params,
50
+ const RuntimeShape& input_shape, const T* input_data,
51
+ const RuntimeShape& output_shape, float* output_data) {
52
+ // Ensure flat size is same.
53
+ MatchingFlatSize(input_shape, output_shape);
54
+
55
+ const int32_t* zero_point = op_params.zero_point;
56
+ const float* scale = op_params.scale;
57
+ const int32_t quantized_dimension = op_params.quantized_dimension;
58
+ const int32_t num_dims = input_shape.DimensionsCount();
59
+ const int32_t* dims_data = input_shape.DimsData();
60
+ std::vector<int> current_dim(num_dims, 0);
61
+
62
+ do {
63
+ size_t offset =
64
+ ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
65
+ current_dim.data(), 0, nullptr);
66
+ const int channel = current_dim[quantized_dimension];
67
+ const int32_t val = input_data[offset];
68
+ const float result =
69
+ static_cast<float>(scale[channel] * (val - zero_point[channel]));
70
+ output_data[offset] = result;
71
+ } while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
72
+ current_dim.data()));
73
+ }
74
+
75
+ } // namespace reference_ops
76
+
77
+ } // namespace tflite_micro
78
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
@@ -0,0 +1,247 @@
1
+ /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
17
+
18
+ #include <algorithm>
19
+
20
+ #include "tensorflow/lite/kernels/internal/common.h"
21
+
22
+ namespace tflite_micro {
23
+
24
+ namespace reference_ops {
25
+
26
+ template <typename T>
27
+ inline void DivCheckArithmeticParams(const ArithmeticParams& params) {
28
+ TFLITE_DCHECK_LE(params.quantized_activation_min,
29
+ params.quantized_activation_max);
30
+ // Input offset is negative input zero point. Activation tensors are
31
+ // asymmetric quantized so they span the full int8 range.
32
+ constexpr int32_t max_value =
33
+ static_cast<int32_t>(std::numeric_limits<T>::max());
34
+ TFLITE_DCHECK_GE(params.input1_offset, -max_value);
35
+ TFLITE_DCHECK_LE(params.input1_offset, max_value);
36
+ TFLITE_DCHECK_GE(params.input2_offset, -max_value);
37
+ TFLITE_DCHECK_LE(params.input2_offset, max_value);
38
+ TFLITE_DCHECK_GE(params.output_offset, -max_value);
39
+ TFLITE_DCHECK_LE(params.output_offset, max_value);
40
+ }
41
+
42
+ // Element-wise div that can often be used for inner loop of broadcast Div as
43
+ // well as the non-broadcast Div.
44
+ template <typename T>
45
+ inline void DivElementwise(int size, const ArithmeticParams& params,
46
+ const T* input1_data, const T* input2_data,
47
+ T* output_data) {
48
+ DivCheckArithmeticParams<T>(params);
49
+
50
+ for (int i = 0; i < size; ++i) {
51
+ int32_t input1_val = params.input1_offset + input1_data[i];
52
+ int32_t input2_val = params.input2_offset + input2_data[i];
53
+ TFLITE_DCHECK_NE(input2_val, 0);
54
+ if (input2_val < 0) {
55
+ // Invert signs to avoid a negative input2_val as input2_inv needs to be
56
+ // positive to be used as multiplier of MultiplyByQuantizedMultiplier.
57
+ input1_val = -input1_val;
58
+ input2_val = -input2_val;
59
+ }
60
+ int recip_shift;
61
+ const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
62
+ const int headroom = CountLeadingSignBits(input1_val);
63
+ const int32_t unscaled_quotient =
64
+ MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
65
+ headroom);
66
+ const int total_shift = params.output_shift - recip_shift - headroom;
67
+ const int32_t unclamped_result =
68
+ params.output_offset +
69
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
70
+ unscaled_quotient, params.output_multiplier, total_shift);
71
+ const int32_t clamped_output =
72
+ std::min(params.quantized_activation_max,
73
+ std::max(params.quantized_activation_min, unclamped_result));
74
+ output_data[i] = static_cast<T>(clamped_output);
75
+ }
76
+ }
77
+
78
+ inline void Div(const ArithmeticParams& params,
79
+ const RuntimeShape& input1_shape, const uint8_t* input1_data,
80
+ const RuntimeShape& input2_shape, const uint8_t* input2_data,
81
+ const RuntimeShape& output_shape, uint8_t* output_data) {
82
+ TFLITE_DCHECK_LE(params.quantized_activation_min,
83
+ params.quantized_activation_max);
84
+ const int flat_size =
85
+ MatchingElementsSize(input1_shape, input2_shape, output_shape);
86
+
87
+ DivElementwise(flat_size, params, input1_data, input2_data, output_data);
88
+ }
89
+
90
+ inline void Div(const ArithmeticParams& params,
91
+ const RuntimeShape& input1_shape, const int8_t* input1_data,
92
+ const RuntimeShape& input2_shape, const int8_t* input2_data,
93
+ const RuntimeShape& output_shape, int8_t* output_data) {
94
+ TFLITE_DCHECK_LE(params.quantized_activation_min,
95
+ params.quantized_activation_max);
96
+ const int flat_size =
97
+ MatchingElementsSize(input1_shape, input2_shape, output_shape);
98
+
99
+ DivElementwise(flat_size, params, input1_data, input2_data, output_data);
100
+ }
101
+
102
+ template <typename T, int N = 5>
103
+ inline void BroadcastDivSlowQuantized(
104
+ const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape,
105
+ const T* input1_data, const RuntimeShape& unextended_input2_shape,
106
+ const T* input2_data, const RuntimeShape& unextended_output_shape,
107
+ T* output_data) {
108
+ TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
109
+ TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
110
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
111
+
112
+ NdArrayDesc<N> desc1;
113
+ NdArrayDesc<N> desc2;
114
+ NdArrayDesc<N> output_desc;
115
+ NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
116
+ unextended_input2_shape, &desc1, &desc2);
117
+ CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
118
+ &output_desc);
119
+
120
+ DivCheckArithmeticParams<T>(params);
121
+
122
+ auto div_func = [&](int indexes[N]) {
123
+ int32_t input1_val =
124
+ params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
125
+ int32_t input2_val =
126
+ params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
127
+ TFLITE_DCHECK_NE(input2_val, 0);
128
+ if (input2_val < 0) {
129
+ // Invert signs to avoid a negative input2_val as input2_inv needs to be
130
+ // positive to be used as multiplier of MultiplyByQuantizedMultiplier.
131
+ input1_val = -input1_val;
132
+ input2_val = -input2_val;
133
+ }
134
+ int recip_shift;
135
+ const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
136
+ const int headroom = CountLeadingSignBits(input1_val);
137
+ const int32_t unscaled_quotient =
138
+ MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
139
+ headroom);
140
+ const int total_shift = params.output_shift - recip_shift - headroom;
141
+ const int32_t unclamped_result =
142
+ params.output_offset +
143
+ MultiplyByQuantizedMultiplierSmallerThanOneExp(
144
+ unscaled_quotient, params.output_multiplier, total_shift);
145
+ const int32_t clamped_output =
146
+ std::min(params.quantized_activation_max,
147
+ std::max(params.quantized_activation_min, unclamped_result));
148
+ output_data[SubscriptToIndex(output_desc, indexes)] =
149
+ static_cast<T>(clamped_output);
150
+ };
151
+ NDOpsHelper<N>(output_desc, div_func);
152
+ }
153
+
154
+ template <int N = 5>
155
+ inline void BroadcastDivSlow(const ArithmeticParams& params,
156
+ const RuntimeShape& unextended_input1_shape,
157
+ const uint8_t* input1_data,
158
+ const RuntimeShape& unextended_input2_shape,
159
+ const uint8_t* input2_data,
160
+ const RuntimeShape& unextended_output_shape,
161
+ uint8_t* output_data) {
162
+ BroadcastDivSlowQuantized<uint8_t, N>(
163
+ params, unextended_input1_shape, input1_data, unextended_input2_shape,
164
+ input2_data, unextended_output_shape, output_data);
165
+ }
166
+
167
+ template <int N = 5>
168
+ inline void BroadcastDivSlow(const ArithmeticParams& params,
169
+ const RuntimeShape& unextended_input1_shape,
170
+ const int8_t* input1_data,
171
+ const RuntimeShape& unextended_input2_shape,
172
+ const int8_t* input2_data,
173
+ const RuntimeShape& unextended_output_shape,
174
+ int8_t* output_data) {
175
+ BroadcastDivSlowQuantized<int8_t, N>(
176
+ params, unextended_input1_shape, input1_data, unextended_input2_shape,
177
+ input2_data, unextended_output_shape, output_data);
178
+ }
179
+
180
+ // TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary
181
+ // dimensionality if the runtime code does a single loop over one dimension
182
+ // that handles broadcasting as the base case. The code generator would then
183
+ // generate max(D1, D2) nested for loops.
184
+ template <typename T, int N = 5>
185
+ void BroadcastDivSlow(const ArithmeticParams& params,
186
+ const RuntimeShape& unextended_input1_shape,
187
+ const T* input1_data,
188
+ const RuntimeShape& unextended_input2_shape,
189
+ const T* input2_data,
190
+ const RuntimeShape& unextended_output_shape,
191
+ T* output_data) {
192
+ T output_activation_min;
193
+ T output_activation_max;
194
+ GetActivationParams(params, &output_activation_min, &output_activation_max);
195
+
196
+ TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
197
+ TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
198
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
199
+
200
+ NdArrayDesc<N> desc1;
201
+ NdArrayDesc<N> desc2;
202
+ NdArrayDesc<N> output_desc;
203
+ NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
204
+ unextended_input2_shape, &desc1, &desc2);
205
+ CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
206
+ &output_desc);
207
+
208
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
209
+ // col, channel), with extents (batches, height, width, depth), with the
210
+ // trailing dimension changing most rapidly (channels has the smallest
211
+ // stride, typically 1 element).
212
+ //
213
+ // In generated C code, we store arrays with the dimensions reversed. The
214
+ // first dimension has smallest stride.
215
+
216
+ auto div_func = [&](int indexes[N]) {
217
+ output_data[SubscriptToIndex(output_desc, indexes)] =
218
+ ActivationFunctionWithMinMax(
219
+ input1_data[SubscriptToIndex(desc1, indexes)] /
220
+ input2_data[SubscriptToIndex(desc2, indexes)],
221
+ output_activation_min, output_activation_max);
222
+ };
223
+ NDOpsHelper<N>(output_desc, div_func);
224
+ }
225
+
226
+ template <typename T>
227
+ inline void Div(const ArithmeticParams& params,
228
+ const RuntimeShape& input1_shape, const T* input1_data,
229
+ const RuntimeShape& input2_shape, const T* input2_data,
230
+ const RuntimeShape& output_shape, T* output_data) {
231
+ T output_activation_min;
232
+ T output_activation_max;
233
+ GetActivationParams(params, &output_activation_min, &output_activation_max);
234
+
235
+ const int flat_size =
236
+ MatchingElementsSize(input1_shape, input2_shape, output_shape);
237
+ for (int i = 0; i < flat_size; ++i) {
238
+ output_data[i] = ActivationFunctionWithMinMax(
239
+ input1_data[i] / input2_data[i], output_activation_min,
240
+ output_activation_max);
241
+ }
242
+ }
243
+
244
+ } // namespace reference_ops
245
+ } // namespace tflite_micro
246
+
247
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
@@ -0,0 +1,37 @@
1
+ /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
17
+
18
+ #include "tensorflow/lite/kernels/internal/cppmath.h"
19
+ #include "tensorflow/lite/kernels/internal/types.h"
20
+
21
+ namespace tflite_micro {
22
+
23
+ namespace reference_ops {
24
+
25
+ inline void Elu(const RuntimeShape& input_shape, const float* input_data,
26
+ const RuntimeShape& output_shape, float* output_data) {
27
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
28
+ for (int i = 0; i < flat_size; ++i) {
29
+ const float val = input_data[i];
30
+ output_data[i] = val < 0.0f ? TfLiteExpm1(val) : val;
31
+ }
32
+ }
33
+
34
+ } // namespace reference_ops
35
+ } // namespace tflite_micro
36
+
37
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_