Files
tinyengine/examples/openmv_training_sparse/openmv_sparse_training.patch
Wei-Ming Chen d47d179e49 Training demo on openmv cam (#45)
* sparse training example on openmv cam

* minor fix for openmv firmware compliation

* python side code

* mior

* update README

* remove fc only and update README

* Update README.md

* update news

* update news

* update link
2023-02-07 14:03:45 -08:00

635 lines
24 KiB
Diff

diff --git a/src/hal/cmsis/include/arm_nnsupportfunctions.h b/src/hal/cmsis/include/arm_nnsupportfunctions.h
index 84601904..abc6fe04 100644
--- a/src/hal/cmsis/include/arm_nnsupportfunctions.h
+++ b/src/hal/cmsis/include/arm_nnsupportfunctions.h
@@ -39,6 +39,9 @@ extern "C"
{
#endif
+#define MAX(A,B) ((A) > (B) ? (A) : (B))
+#define MIN(A,B) ((A) < (B) ? (A) : (B))
+#define CLAMP(x, h, l) MAX(MIN((x), (h)), (l))
/**
* @brief Union for SIMD access of Q31/Q15/Q7 types
*/
@@ -179,6 +182,7 @@ void arm_nn_mult_q15(
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
*/
+
void arm_nn_mult_q7(
q7_t * pSrcA,
q7_t * pSrcB,
@@ -186,6 +190,128 @@ void arm_nn_mult_q7(
const uint16_t out_shift,
uint32_t blockSize);
+//TinyEngine required
+#define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
+#define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
+#define Q31_MAX ((q31_t)(0x7FFFFFFFL))
+#define Q31_MIN ((q31_t)(0x80000000L))
+
+static __INLINE void write_q15x2_ia (
+ q15_t ** pQ15,
+ q31_t value)
+ {
+ q31_t val = value;
+ #ifdef __ARM_FEATURE_UNALIGNED
+ memcpy (*pQ15, &val, 4);
+ #else
+ (*pQ15)[0] = (val & 0x0FFFF);
+ (*pQ15)[1] = (val >> 16) & 0x0FFFF;
+ #endif
+
+ *pQ15 += 2;
+ }
+
+/**
+ @brief Read 2 q15 elements and post increment pointer.
+ @param[in] in_q15 Pointer to pointer that holds address of input.
+ @return q31 value
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2_ia(const q15_t **in_q15)
+{
+ q31_t val;
+
+ memcpy(&val, *in_q15, 4);
+ *in_q15 += 2;
+
+ return (val);
+}
+
+/**
+ * @brief Saturating doubling high multiply. Result matches
+ * NEON instruction VQRDMULH.
+ * @param[in] m1 Multiplicand
+ * @param[in] m2 Multiplier
+ * @return Result of multiplication.
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
+{
+ q31_t result = 0;
+ // Rounding offset to add for a right shift of 31
+ q63_t mult = 1 << 30;
+
+ if ((m1 < 0) ^ (m2 < 0))
+ {
+ mult = 1 - mult;
+ }
+ // Gets resolved as a SMLAL instruction
+ mult = mult + (q63_t)m1 * m2;
+
+ // Utilize all of the upper 32 bits. This is the doubling step
+ // as well.
+ result = mult / (1UL << 31);
+
+ if ((m1 == m2) && (m1 == (int32_t)Q31_MIN))
+ {
+ result = Q31_MAX;
+ }
+ return result;
+}
+
+/**
+ * @brief Rounding divide by power of two.
+ * @param[in] dividend - Dividend
+ * @param[in] exponent - Divisor = power(2, exponent)
+ * Range: [0, 31]
+ * @return Rounded result of division. Midpoint is rounded away from zero.
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
+{
+ q31_t result = 0;
+
+ const q31_t remainder_mask = (1l << exponent) - 1;
+ int32_t remainder = remainder_mask & dividend;
+
+ // Basic division
+ result = dividend >> exponent;
+
+ // Adjust 'result' for rounding (mid point away from zero)
+ q31_t threshold = remainder_mask >> 1;
+ if (result < 0)
+ {
+ threshold++;
+ }
+ if (remainder > threshold)
+ {
+ result++;
+ }
+
+ return result;
+}
+
+__STATIC_FORCEINLINE q31_t arm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
+{
+ return arm_nn_divide_by_power_of_two(arm_nn_sat_doubling_high_mult(val * (1 << LEFT_SHIFT(shift)), multiplier),
+ RIGHT_SHIFT(shift));
+}
+
+/**
+ @brief Read 4 q7 from q7 pointer and post increment pointer.
+ @param[in] in_q7 Pointer to pointer that holds address of input.
+ @return q31 value
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4_ia(const q7_t **in_q7)
+{
+ q31_t val;
+ memcpy(&val, *in_q7, 4);
+ *in_q7 += 4;
+
+ return (val);
+}
+
+
+
/**
* @brief defition to adding rouding offset
*/
diff --git a/src/omv/Makefile b/src/omv/Makefile
index 159d07a5..239fa50a 100644
--- a/src/omv/Makefile
+++ b/src/omv/Makefile
@@ -96,6 +96,50 @@ SRCS += $(addprefix imlib/, \
zbar.c \
)
+SRCS += $(addprefix modules/TinyEngine/, \
+ codegen/Source/genModel.c \
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq.c \
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq.c \
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq.c \
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq.c \
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq.c \
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq_bitmask.c \
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq_bitmask.c \
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq_bitmask.c \
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq_bitmask.c \
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq_bitmask.c \
+ src/kernels/fp_requantize_op/add_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch8_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch16_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch24_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch48_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq.c \
+ src/kernels/int_forward_op/avgpooling.c \
+ src/kernels/fp_requantize_op/convolve_s8_kernel3_inputch3_stride2_pad1_fpreq.c \
+ src/kernels/fp_requantize_op/mat_mul_kernels_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask_partialCH.c \
+ src/kernels/fp_backward_op/sum_4D_exclude_fp.c \
+ src/kernels/fp_backward_op/where_fp.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride1_inpad1_outpad0.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride2_inpad1_outpad1.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride1_inpad2_outpad0.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride2_inpad2_outpad1.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride1_inpad3_outpad0.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride2_inpad3_outpad1.c \
+ src/kernels/fp_backward_op/tte_exp_fp.c \
+ src/kernels/fp_backward_op/sub_fp.c \
+ src/kernels/fp_backward_op/mul_fp.c \
+ src/kernels/fp_backward_op/pointwise_conv_fp.c \
+ src/kernels/fp_backward_op/group_pointwise_conv_fp.c \
+ src/kernels/fp_backward_op/group_conv_fp_kernel4_stride1_pad0.c \
+ src/kernels/fp_backward_op/group_conv_fp_kernel8_stride1_pad0.c \
+ src/kernels/fp_backward_op/strided_slice_4Dto4D_fp.c \
+ src/kernels/fp_backward_op/sum_3D_fp.c \
+ src/kernels/fp_backward_op/nll_loss_fp.c \
+ src/kernels/fp_backward_op/log_softmax_fp.c \
+ )
+
SRCS += $(wildcard ports/$(PORT)/*.c)
OBJS = $(addprefix $(BUILD)/, $(SRCS:.c=.o))
diff --git a/src/omv/boards/OPENMV4/imlib_config.h b/src/omv/boards/OPENMV4/imlib_config.h
index fd395d87..262d527f 100644
--- a/src/omv/boards/OPENMV4/imlib_config.h
+++ b/src/omv/boards/OPENMV4/imlib_config.h
@@ -18,90 +18,90 @@
#define IMLIB_ENABLE_IMAGE_FILE_IO
// Enable LAB LUT
-#define IMLIB_ENABLE_LAB_LUT
+// #define IMLIB_ENABLE_LAB_LUT
// Enable YUV LUT
//#define IMLIB_ENABLE_YUV_LUT
// Enable mean pooling
-#define IMLIB_ENABLE_MEAN_POOLING
+// #define IMLIB_ENABLE_MEAN_POOLING
// Enable midpoint pooling
-#define IMLIB_ENABLE_MIDPOINT_POOLING
+// #define IMLIB_ENABLE_MIDPOINT_POOLING
// Enable binary ops
-#define IMLIB_ENABLE_BINARY_OPS
+// #define IMLIB_ENABLE_BINARY_OPS
// Enable math ops
-#define IMLIB_ENABLE_MATH_OPS
+// #define IMLIB_ENABLE_MATH_OPS
// Enable flood_fill()
-#define IMLIB_ENABLE_FLOOD_FILL
+// #define IMLIB_ENABLE_FLOOD_FILL
// Enable mean()
-#define IMLIB_ENABLE_MEAN
+// #define IMLIB_ENABLE_MEAN
// Enable median()
-#define IMLIB_ENABLE_MEDIAN
+// #define IMLIB_ENABLE_MEDIAN
// Enable mode()
-#define IMLIB_ENABLE_MODE
+// #define IMLIB_ENABLE_MODE
// Enable midpoint()
-#define IMLIB_ENABLE_MIDPOINT
+// #define IMLIB_ENABLE_MIDPOINT
// Enable morph()
-#define IMLIB_ENABLE_MORPH
+// #define IMLIB_ENABLE_MORPH
// Enable Gaussian
-#define IMLIB_ENABLE_GAUSSIAN
+// #define IMLIB_ENABLE_GAUSSIAN
// Enable Laplacian
-#define IMLIB_ENABLE_LAPLACIAN
+// #define IMLIB_ENABLE_LAPLACIAN
// Enable bilateral()
-#define IMLIB_ENABLE_BILATERAL
+// #define IMLIB_ENABLE_BILATERAL
// Enable cartoon()
// #define IMLIB_ENABLE_CARTOON
// Enable linpolar()
-#define IMLIB_ENABLE_LINPOLAR
+// #define IMLIB_ENABLE_LINPOLAR
// Enable logpolar()
-#define IMLIB_ENABLE_LOGPOLAR
+// #define IMLIB_ENABLE_LOGPOLAR
// Enable lens_corr()
-#define IMLIB_ENABLE_LENS_CORR
+// #define IMLIB_ENABLE_LENS_CORR
// Enable rotation_corr()
-#define IMLIB_ENABLE_ROTATION_CORR
+// #define IMLIB_ENABLE_ROTATION_CORR
// Enable phasecorrelate()
#if defined(IMLIB_ENABLE_ROTATION_CORR)
-#define IMLIB_ENABLE_FIND_DISPLACEMENT
+// #define IMLIB_ENABLE_FIND_DISPLACEMENT
#endif
// Enable get_similarity()
-#define IMLIB_ENABLE_GET_SIMILARITY
+// #define IMLIB_ENABLE_GET_SIMILARITY
// Enable find_lines()
-#define IMLIB_ENABLE_FIND_LINES
+// #define IMLIB_ENABLE_FIND_LINES
// Enable find_line_segments()
-#define IMLIB_ENABLE_FIND_LINE_SEGMENTS
+// #define IMLIB_ENABLE_FIND_LINE_SEGMENTS
// Enable find_circles()
-#define IMLIB_ENABLE_FIND_CIRCLES
+// #define IMLIB_ENABLE_FIND_CIRCLES
// Enable find_rects()
-#define IMLIB_ENABLE_FIND_RECTS
+// #define IMLIB_ENABLE_FIND_RECTS
// Enable find_qrcodes() (14 KB)
-#define IMLIB_ENABLE_QRCODES
+//#define IMLIB_ENABLE_QRCODES
// Enable find_apriltags() (64 KB)
-#define IMLIB_ENABLE_APRILTAGS
+//#define IMLIB_ENABLE_APRILTAGS
// Enable fine find_apriltags() - (8-way connectivity versus 4-way connectivity)
// #define IMLIB_ENABLE_FINE_APRILTAGS
@@ -110,10 +110,10 @@
// #define IMLIB_ENABLE_HIGH_RES_APRILTAGS
// Enable find_datamatrices() (26 KB)
-#define IMLIB_ENABLE_DATAMATRICES
+//#define IMLIB_ENABLE_DATAMATRICES
// Enable find_barcodes() (42 KB)
-#define IMLIB_ENABLE_BARCODES
+//#define IMLIB_ENABLE_BARCODES
// Enable CMSIS NN
// #if !defined(CUBEAI)
@@ -122,26 +122,26 @@
// Enable Tensor Flow
#if !defined(CUBEAI)
-#define IMLIB_ENABLE_TF
+//#define IMLIB_ENABLE_TF
#endif
// Enable FAST (20+ KBs).
// #define IMLIB_ENABLE_FAST
// Enable find_template()
-#define IMLIB_FIND_TEMPLATE
+// #define IMLIB_FIND_TEMPLATE
// Enable find_lbp()
-#define IMLIB_ENABLE_FIND_LBP
+// #define IMLIB_ENABLE_FIND_LBP
// Enable find_keypoints()
-#define IMLIB_ENABLE_FIND_KEYPOINTS
+// #define IMLIB_ENABLE_FIND_KEYPOINTS
// Enable load, save and match descriptor
-#define IMLIB_ENABLE_DESCRIPTOR
+// #define IMLIB_ENABLE_DESCRIPTOR
// Enable find_hog()
-#define IMLIB_ENABLE_HOG
+// #define IMLIB_ENABLE_HOG
// Enable selective_search()
// #define IMLIB_ENABLE_SELECTIVE_SEARCH
diff --git a/src/omv/boards/OPENMV4/omv_boardconfig.h b/src/omv/boards/OPENMV4/omv_boardconfig.h
index 412de472..f7da2c03 100644
--- a/src/omv/boards/OPENMV4/omv_boardconfig.h
+++ b/src/omv/boards/OPENMV4/omv_boardconfig.h
@@ -150,16 +150,18 @@
// The maximum available fb_alloc memory = FB_ALLOC_SIZE + FB_SIZE - (w*h*bpp).
#define OMV_FFS_MEMORY DTCM // Flash filesystem cache memory
#define OMV_MAIN_MEMORY SRAM1 // data, bss and heap memory
+#define OMV_MAIN_MEMORY2 SRAM5 // my memory
#define OMV_STACK_MEMORY ITCM // stack memory
#define OMV_DMA_MEMORY SRAM2 // DMA buffers memory.
#define OMV_FB_MEMORY AXI_SRAM // Framebuffer, fb_alloc
#define OMV_JPEG_MEMORY SRAM3 // JPEG buffer memory.
#define OMV_VOSPI_MEMORY SRAM4 // VoSPI buffer memory.
-#define OMV_FB_SIZE (400K) // FB memory: header + VGA/GS image
-#define OMV_FB_ALLOC_SIZE (100K) // minimum fb alloc size
+#define OMV_FB_SIZE (100K) // defualt: 400 FB memory: header + VGA/GS image
+#define OMV_FB_ALLOC_SIZE (50K) // default: 100 minimum fb alloc size
#define OMV_STACK_SIZE (64K)
-#define OMV_HEAP_SIZE (236K)
+#define OMV_HEAP_SIZE (136K)
+// #define OMV_HEAP_SIZE (236K)
#define OMV_LINE_BUF_SIZE (3 * 1024) // Image line buffer round(640 * 2BPP * 2 buffers).
#define OMV_MSC_BUF_SIZE (2K) // USB MSC bot data
@@ -175,21 +177,27 @@
#define OMV_DTCM_LENGTH 128K
#define OMV_ITCM_ORIGIN 0x00000000
#define OMV_ITCM_LENGTH 64K
-#define OMV_SRAM1_ORIGIN 0x30000000
-#define OMV_SRAM1_LENGTH 248K
+// #define OMV_SRAM1_ORIGIN 0x30000000
+// #define OMV_SRAM1_LENGTH 248K
+#define OMV_SRAM1_ORIGIN 0x24000000
+#define OMV_SRAM1_LENGTH 512K
#define OMV_SRAM2_ORIGIN 0x3003E000 // 8KB of SRAM1
#define OMV_SRAM2_LENGTH 8K
#define OMV_SRAM3_ORIGIN 0x30040000
#define OMV_SRAM3_LENGTH 32K
#define OMV_SRAM4_ORIGIN 0x38000000
#define OMV_SRAM4_LENGTH 64K
-#define OMV_AXI_SRAM_ORIGIN 0x24000000
-#define OMV_AXI_SRAM_LENGTH 512K
+#define OMV_AXI_SRAM_ORIGIN 0x30000000
+#define OMV_AXI_SRAM_LENGTH 248K
+// #define OMV_AXI_SRAM_ORIGIN 0x24000000
+// #define OMV_AXI_SRAM_LENGTH 512K
+
// Domain 1 DMA buffers region.
#define OMV_DMA_MEMORY_D1 AXI_SRAM
#define OMV_DMA_MEMORY_D1_SIZE (8*1024) // Reserved memory for DMA buffers
-#define OMV_DMA_REGION_D1_BASE (OMV_AXI_SRAM_ORIGIN+(500*1024))
+#define OMV_DMA_REGION_D1_BASE (OMV_AXI_SRAM_ORIGIN+(400*1024))
+// #define OMV_DMA_REGION_D1_BASE (OMV_AXI_SRAM_ORIGIN+(500*1024))
#define OMV_DMA_REGION_D1_SIZE MPU_REGION_SIZE_8KB
// Domain 2 DMA buffers region.
diff --git a/src/omv/modules/examplemodule.c b/src/omv/modules/examplemodule.c
index 37e2b4f4..52d1bda2 100644
--- a/src/omv/modules/examplemodule.c
+++ b/src/omv/modules/examplemodule.c
@@ -1,17 +1,81 @@
// Include MicroPython API.
#include "py/runtime.h"
+#include "genNN.h"
+#include "detectionUtility.h"
+#include <stdio.h>
+#include "py_image.h"
-// This is the function which will be called from Python as cexample.add_ints(a, b).
-STATIC mp_obj_t example_add_ints(mp_obj_t a_obj, mp_obj_t b_obj) {
- // Extract the ints from the micropython input objects.
- int a = mp_obj_get_int(a_obj);
- int b = mp_obj_get_int(b_obj);
+#define TEST_SIZE 1 * 1024
+#define TN_MAX(A,B) ((A) > (B) ? (A) : (B))
+#define TN_MIN(A,B) ((A) < (B) ? (A) : (B))``
+
+// for fc only
+#define ORIGIN_H 128
+#define ORIGIN_W 128
+#define IMAGE_H 128
+#define IMAGE_W 128
- // Calculate the addition and convert to MicroPython object.
- return mp_obj_new_int(a + b);
+uint16_t color;
+float labels[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+// This is the function which will be called from Python as cexample.add_ints(a, b).
+STATIC mp_obj_t example_train_demo_fn(mp_obj_t a, mp_obj_t b) {
+ image_t* img = py_image_cobj(a);
+ // >= 0, for training with the label, -1 is for inference
+ int command = mp_obj_get_int(b);
+ // preprocessing
+ signed char *input = getInput();
+ int i, j;
+ for (j = 0; j < IMAGE_H; j++){
+ for (i = 0; i < IMAGE_W; i++){
+ int index = j + IMAGE_W * i;
+ if (i >= ORIGIN_W || j >= ORIGIN_H){
+ input[index * 3] = (int8_t) 0;
+ input[index * 3 + 1] = (int8_t) 0;
+ input[index * 3 + 2] = (int8_t) 0;
+ continue;
+ }
+ uint16_t color = IMAGE_GET_RGB565_PIXEL(img, i + MAX((ORIGIN_W-IMAGE_W)/2,0),
+ j + MAX((ORIGIN_H-IMAGE_H)/2,0));
+ // uint16_t color = IMAGE_GET_RGB565_PIXEL(img, j, 87 - i);
+ int r, g, b;
+ r = ((color & 0xF800) >> 11) * 8;
+ g = ((color & 0x07E0) >> 5) * 4;
+ b = ((color & 0x001F) >> 0) * 8;
+ input[index * 3] = (int8_t) (r - 128);
+ input[index * 3 + 1] = (int8_t) (g - 128);
+ input[index * 3 + 2] = (int8_t) (b - 128);
+ }
+ }
+ if (command >= 0){
+ labels[0] = 0;
+ labels[1] = 0;
+ labels[command] = 1;
+ invoke(labels);
+ printf("train class %d\n", command);
+ }
+ else{
+ invoke_inf();
+ uint8_t* output = (uint8_t*)getOutput();
+ if(output[0] > output[1]){
+ printf("infer class 0\n");
+ color = 63488;
+ }
+ else{
+ printf("infer class 1\n");
+ color = 2016;
+ }
+ int x_start = 3, y_start = 3;
+ for (i = 0; i < 5; i++){
+ for (j = 0; j < 5; j++){
+ IMAGE_PUT_RGB565_PIXEL(img, i + x_start, j + y_start, color);
+ }
+ }
+ }
+ return mp_obj_new_int(0);
}
+
// Define a Python reference to the function above.
-STATIC MP_DEFINE_CONST_FUN_OBJ_2(example_add_ints_obj, example_add_ints);
+STATIC MP_DEFINE_CONST_FUN_OBJ_2(example_train_demo, example_train_demo_fn);
// Define all properties of the module.
// Table entries are key/value pairs of the attribute name (a string)
@@ -20,7 +84,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(example_add_ints_obj, example_add_ints);
// optimized to word-sized integers by the build system (interned strings).
STATIC const mp_rom_map_elem_t example_module_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_cexample) },
- { MP_ROM_QSTR(MP_QSTR_add_ints), MP_ROM_PTR(&example_add_ints_obj) },
+ { MP_ROM_QSTR(MP_QSTR_train_demo), MP_ROM_PTR(&example_train_demo) },
};
STATIC MP_DEFINE_CONST_DICT(example_module_globals, example_module_globals_table);
@@ -33,4 +97,4 @@ const mp_obj_module_t example_user_cmodule = {
// Register the module to make it available in Python.
// Note: This module is disabled, set the thrid argument to 1 to enable it, or
// use a macro like MODULE_CEXAMPLE_ENABLED to conditionally enable this module.
-MP_REGISTER_MODULE(MP_QSTR_cexample, example_user_cmodule, 0);
+MP_REGISTER_MODULE(MP_QSTR_cexample, example_user_cmodule, 1);
diff --git a/src/omv/ports/stm32/omv_portconfig.mk b/src/omv/ports/stm32/omv_portconfig.mk
index 200ffb7d..b3049e25 100644
--- a/src/omv/ports/stm32/omv_portconfig.mk
+++ b/src/omv/ports/stm32/omv_portconfig.mk
@@ -4,7 +4,7 @@ STARTUP ?= st/startup_$(shell echo $(MCU) | tr '[:upper:]' '[:lower:]')
LDSCRIPT ?= stm32fxxx
# Compiler Flags
-CFLAGS += -std=gnu99 -Wall -Werror -Warray-bounds -mthumb -nostartfiles -fdata-sections -ffunction-sections
+CFLAGS += -std=gnu99 -Warray-bounds -mthumb -nostartfiles -fdata-sections -ffunction-sections -lm
CFLAGS += -fno-inline-small-functions -D$(MCU) -D$(CFLAGS_MCU) -D$(ARM_MATH) -DARM_NN_TRUNCATE\
-fsingle-precision-constant -Wdouble-promotion -mcpu=$(CPU) -mtune=$(CPU) -mfpu=$(FPU) -mfloat-abi=hard
CFLAGS += -D__FPU_PRESENT=1 -D__VFP_FP__ -DUSE_USB_FS -DUSE_DEVICE_MODE -DUSE_USB_OTG_ID=0 -DHSE_VALUE=$(OMV_HSE_VALUE)\
@@ -34,6 +34,10 @@ OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/alloc/
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/common/
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/imlib/
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/
+OMV_CFLAGS += -I$(TOP_DIR)/hal/cmsis/include/
+OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/TinyEngine
+OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/TinyEngine/include
+OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/TinyEngine/codegen/Include
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/sensors/
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/ports/$(PORT)/
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/ports/$(PORT)/modules/
@@ -213,6 +217,50 @@ FIRM_OBJ += $(addprefix $(BUILD)/$(OMV_DIR)/imlib/, \
zbar.o \
)
+FIRM_OBJ += $(addprefix $(BUILD)/$(OMV_DIR)/modules/TinyEngine/, \
+ codegen/Source/genModel.o \
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq.o \
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq.o \
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq.o \
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq.o \
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq.o \
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq_bitmask.o \
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq_bitmask.o \
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq_bitmask.o \
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq_bitmask.o \
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq_bitmask.o \
+ src/kernels/fp_requantize_op/add_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch8_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch16_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch24_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch48_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq.o \
+ src/kernels/int_forward_op/avgpooling.o \
+ src/kernels/fp_requantize_op/convolve_s8_kernel3_inputch3_stride2_pad1_fpreq.o \
+ src/kernels/fp_requantize_op/mat_mul_kernels_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask_partialCH.o \
+ src/kernels/fp_backward_op/sum_4D_exclude_fp.o \
+ src/kernels/fp_backward_op/where_fp.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride1_inpad1_outpad0.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride2_inpad1_outpad1.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride1_inpad2_outpad0.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride2_inpad2_outpad1.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride1_inpad3_outpad0.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride2_inpad3_outpad1.o \
+ src/kernels/fp_backward_op/tte_exp_fp.o \
+ src/kernels/fp_backward_op/sub_fp.o \
+ src/kernels/fp_backward_op/mul_fp.o \
+ src/kernels/fp_backward_op/pointwise_conv_fp.o \
+ src/kernels/fp_backward_op/group_pointwise_conv_fp.o \
+ src/kernels/fp_backward_op/group_conv_fp_kernel4_stride1_pad0.o \
+ src/kernels/fp_backward_op/group_conv_fp_kernel8_stride1_pad0.o \
+ src/kernels/fp_backward_op/strided_slice_4Dto4D_fp.o \
+ src/kernels/fp_backward_op/sum_3D_fp.o \
+ src/kernels/fp_backward_op/nll_loss_fp.o \
+ src/kernels/fp_backward_op/log_softmax_fp.o \
+ )
+
FIRM_OBJ += $(wildcard $(BUILD)/$(OMV_DIR)/ports/$(PORT)/*.o)
FIRM_OBJ += $(wildcard $(BUILD)/$(MICROPY_DIR)/modules/*.o)
FIRM_OBJ += $(wildcard $(BUILD)/$(MICROPY_DIR)/ports/$(PORT)/modules/*.o)
@@ -625,7 +673,7 @@ endif
# This target generates the main/app firmware image located at 0x08010000
$(FIRMWARE): FIRMWARE_OBJS
$(CPP) -P -E -I$(OMV_BOARD_CONFIG_DIR) $(OMV_DIR)/ports/$(PORT)/$(LDSCRIPT).ld.S > $(BUILD)/$(LDSCRIPT).lds
- $(CC) $(LDFLAGS) $(FIRM_OBJ) -o $(FW_DIR)/$(FIRMWARE).elf $(LIBS) -lgcc
+ $(CC) $(LDFLAGS) $(FIRM_OBJ) -o $(FW_DIR)/$(FIRMWARE).elf $(LIBS) -lgcc -lm
$(OBJCOPY) -Obinary -R .big_const* $(FW_DIR)/$(FIRMWARE).elf $(FW_DIR)/$(FIRMWARE).bin
$(PYTHON) $(MKDFU) -D $(DFU_DEVICE) -b $(MAIN_APP_ADDR):$(FW_DIR)/$(FIRMWARE).bin $(FW_DIR)/$(FIRMWARE).dfu
@@ -633,7 +681,7 @@ ifeq ($(OMV_ENABLE_BL), 1)
# This target generates the bootloader.
$(BOOTLOADER): FIRMWARE_OBJS BOOTLOADER_OBJS
$(CPP) -P -E -I$(OMV_BOARD_CONFIG_DIR) $(BOOTLDR_DIR)/stm32fxxx.ld.S > $(BUILD)/$(BOOTLDR_DIR)/stm32fxxx.lds
- $(CC) $(BL_LDFLAGS) $(BOOT_OBJ) -o $(FW_DIR)/$(BOOTLOADER).elf -lgcc
+ $(CC) $(BL_LDFLAGS) $(BOOT_OBJ) -o $(FW_DIR)/$(BOOTLOADER).elf -lgcc -lm
$(OBJCOPY) -Obinary $(FW_DIR)/$(BOOTLOADER).elf $(FW_DIR)/$(BOOTLOADER).bin
$(PYTHON) $(MKDFU) -D $(DFU_DEVICE) -b 0x08000000:$(FW_DIR)/$(BOOTLOADER).bin $(FW_DIR)/$(BOOTLOADER).dfu
endif