mirror of
https://github.com/mit-han-lab/tinyengine.git
synced 2025-10-15 13:07:08 +08:00

* sparse training example on openmv cam * minor fix for openmv firmware compliation * python side code * mior * update README * remove fc only and update README * Update README.md * update news * update news * update link
635 lines
24 KiB
Diff
635 lines
24 KiB
Diff
diff --git a/src/hal/cmsis/include/arm_nnsupportfunctions.h b/src/hal/cmsis/include/arm_nnsupportfunctions.h
|
|
index 84601904..abc6fe04 100644
|
|
--- a/src/hal/cmsis/include/arm_nnsupportfunctions.h
|
|
+++ b/src/hal/cmsis/include/arm_nnsupportfunctions.h
|
|
@@ -39,6 +39,9 @@ extern "C"
|
|
{
|
|
#endif
|
|
|
|
+#define MAX(A,B) ((A) > (B) ? (A) : (B))
|
|
+#define MIN(A,B) ((A) < (B) ? (A) : (B))
|
|
+#define CLAMP(x, h, l) MAX(MIN((x), (h)), (l))
|
|
/**
|
|
* @brief Union for SIMD access of Q31/Q15/Q7 types
|
|
*/
|
|
@@ -179,6 +182,7 @@ void arm_nn_mult_q15(
|
|
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
|
*/
|
|
|
|
+
|
|
void arm_nn_mult_q7(
|
|
q7_t * pSrcA,
|
|
q7_t * pSrcB,
|
|
@@ -186,6 +190,128 @@ void arm_nn_mult_q7(
|
|
const uint16_t out_shift,
|
|
uint32_t blockSize);
|
|
|
|
+//TinyEngine required
|
|
+#define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
|
|
+#define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
|
|
+#define Q31_MAX ((q31_t)(0x7FFFFFFFL))
|
|
+#define Q31_MIN ((q31_t)(0x80000000L))
|
|
+
|
|
+static __INLINE void write_q15x2_ia (
|
|
+ q15_t ** pQ15,
|
|
+ q31_t value)
|
|
+ {
|
|
+ q31_t val = value;
|
|
+ #ifdef __ARM_FEATURE_UNALIGNED
|
|
+ memcpy (*pQ15, &val, 4);
|
|
+ #else
|
|
+ (*pQ15)[0] = (val & 0x0FFFF);
|
|
+ (*pQ15)[1] = (val >> 16) & 0x0FFFF;
|
|
+ #endif
|
|
+
|
|
+ *pQ15 += 2;
|
|
+ }
|
|
+
|
|
+/**
|
|
+ @brief Read 2 q15 elements and post increment pointer.
|
|
+ @param[in] in_q15 Pointer to pointer that holds address of input.
|
|
+ @return q31 value
|
|
+ */
|
|
+__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2_ia(const q15_t **in_q15)
|
|
+{
|
|
+ q31_t val;
|
|
+
|
|
+ memcpy(&val, *in_q15, 4);
|
|
+ *in_q15 += 2;
|
|
+
|
|
+ return (val);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * @brief Saturating doubling high multiply. Result matches
|
|
+ * NEON instruction VQRDMULH.
|
|
+ * @param[in] m1 Multiplicand
|
|
+ * @param[in] m2 Multiplier
|
|
+ * @return Result of multiplication.
|
|
+ *
|
|
+ */
|
|
+__STATIC_FORCEINLINE q31_t arm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
|
|
+{
|
|
+ q31_t result = 0;
|
|
+ // Rounding offset to add for a right shift of 31
|
|
+ q63_t mult = 1 << 30;
|
|
+
|
|
+ if ((m1 < 0) ^ (m2 < 0))
|
|
+ {
|
|
+ mult = 1 - mult;
|
|
+ }
|
|
+ // Gets resolved as a SMLAL instruction
|
|
+ mult = mult + (q63_t)m1 * m2;
|
|
+
|
|
+ // Utilize all of the upper 32 bits. This is the doubling step
|
|
+ // as well.
|
|
+ result = mult / (1UL << 31);
|
|
+
|
|
+ if ((m1 == m2) && (m1 == (int32_t)Q31_MIN))
|
|
+ {
|
|
+ result = Q31_MAX;
|
|
+ }
|
|
+ return result;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * @brief Rounding divide by power of two.
|
|
+ * @param[in] dividend - Dividend
|
|
+ * @param[in] exponent - Divisor = power(2, exponent)
|
|
+ * Range: [0, 31]
|
|
+ * @return Rounded result of division. Midpoint is rounded away from zero.
|
|
+ *
|
|
+ */
|
|
+__STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
|
|
+{
|
|
+ q31_t result = 0;
|
|
+
|
|
+ const q31_t remainder_mask = (1l << exponent) - 1;
|
|
+ int32_t remainder = remainder_mask & dividend;
|
|
+
|
|
+ // Basic division
|
|
+ result = dividend >> exponent;
|
|
+
|
|
+ // Adjust 'result' for rounding (mid point away from zero)
|
|
+ q31_t threshold = remainder_mask >> 1;
|
|
+ if (result < 0)
|
|
+ {
|
|
+ threshold++;
|
|
+ }
|
|
+ if (remainder > threshold)
|
|
+ {
|
|
+ result++;
|
|
+ }
|
|
+
|
|
+ return result;
|
|
+}
|
|
+
|
|
+__STATIC_FORCEINLINE q31_t arm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
|
|
+{
|
|
+ return arm_nn_divide_by_power_of_two(arm_nn_sat_doubling_high_mult(val * (1 << LEFT_SHIFT(shift)), multiplier),
|
|
+ RIGHT_SHIFT(shift));
|
|
+}
|
|
+
|
|
+/**
|
|
+ @brief Read 4 q7 from q7 pointer and post increment pointer.
|
|
+ @param[in] in_q7 Pointer to pointer that holds address of input.
|
|
+ @return q31 value
|
|
+ */
|
|
+__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4_ia(const q7_t **in_q7)
|
|
+{
|
|
+ q31_t val;
|
|
+ memcpy(&val, *in_q7, 4);
|
|
+ *in_q7 += 4;
|
|
+
|
|
+ return (val);
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
/**
|
|
* @brief defition to adding rouding offset
|
|
*/
|
|
diff --git a/src/omv/Makefile b/src/omv/Makefile
|
|
index 159d07a5..239fa50a 100644
|
|
--- a/src/omv/Makefile
|
|
+++ b/src/omv/Makefile
|
|
@@ -96,6 +96,50 @@ SRCS += $(addprefix imlib/, \
|
|
zbar.c \
|
|
)
|
|
|
|
+SRCS += $(addprefix modules/TinyEngine/, \
|
|
+ codegen/Source/genModel.c \
|
|
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq.c \
|
|
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq.c \
|
|
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq.c \
|
|
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq.c \
|
|
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq.c \
|
|
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq_bitmask.c \
|
|
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq_bitmask.c \
|
|
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq_bitmask.c \
|
|
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq_bitmask.c \
|
|
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq_bitmask.c \
|
|
+ src/kernels/fp_requantize_op/add_fpreq.c \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch8_fpreq.c \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch16_fpreq.c \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch24_fpreq.c \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch48_fpreq.c \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq.c \
|
|
+ src/kernels/int_forward_op/avgpooling.c \
|
|
+ src/kernels/fp_requantize_op/convolve_s8_kernel3_inputch3_stride2_pad1_fpreq.c \
|
|
+ src/kernels/fp_requantize_op/mat_mul_kernels_fpreq.c \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask.c \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask_partialCH.c \
|
|
+ src/kernels/fp_backward_op/sum_4D_exclude_fp.c \
|
|
+ src/kernels/fp_backward_op/where_fp.c \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride1_inpad1_outpad0.c \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride2_inpad1_outpad1.c \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride1_inpad2_outpad0.c \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride2_inpad2_outpad1.c \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride1_inpad3_outpad0.c \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride2_inpad3_outpad1.c \
|
|
+ src/kernels/fp_backward_op/tte_exp_fp.c \
|
|
+ src/kernels/fp_backward_op/sub_fp.c \
|
|
+ src/kernels/fp_backward_op/mul_fp.c \
|
|
+ src/kernels/fp_backward_op/pointwise_conv_fp.c \
|
|
+ src/kernels/fp_backward_op/group_pointwise_conv_fp.c \
|
|
+ src/kernels/fp_backward_op/group_conv_fp_kernel4_stride1_pad0.c \
|
|
+ src/kernels/fp_backward_op/group_conv_fp_kernel8_stride1_pad0.c \
|
|
+ src/kernels/fp_backward_op/strided_slice_4Dto4D_fp.c \
|
|
+ src/kernels/fp_backward_op/sum_3D_fp.c \
|
|
+ src/kernels/fp_backward_op/nll_loss_fp.c \
|
|
+ src/kernels/fp_backward_op/log_softmax_fp.c \
|
|
+ )
|
|
+
|
|
SRCS += $(wildcard ports/$(PORT)/*.c)
|
|
|
|
OBJS = $(addprefix $(BUILD)/, $(SRCS:.c=.o))
|
|
diff --git a/src/omv/boards/OPENMV4/imlib_config.h b/src/omv/boards/OPENMV4/imlib_config.h
|
|
index fd395d87..262d527f 100644
|
|
--- a/src/omv/boards/OPENMV4/imlib_config.h
|
|
+++ b/src/omv/boards/OPENMV4/imlib_config.h
|
|
@@ -18,90 +18,90 @@
|
|
#define IMLIB_ENABLE_IMAGE_FILE_IO
|
|
|
|
// Enable LAB LUT
|
|
-#define IMLIB_ENABLE_LAB_LUT
|
|
+// #define IMLIB_ENABLE_LAB_LUT
|
|
|
|
// Enable YUV LUT
|
|
//#define IMLIB_ENABLE_YUV_LUT
|
|
|
|
// Enable mean pooling
|
|
-#define IMLIB_ENABLE_MEAN_POOLING
|
|
+// #define IMLIB_ENABLE_MEAN_POOLING
|
|
|
|
// Enable midpoint pooling
|
|
-#define IMLIB_ENABLE_MIDPOINT_POOLING
|
|
+// #define IMLIB_ENABLE_MIDPOINT_POOLING
|
|
|
|
// Enable binary ops
|
|
-#define IMLIB_ENABLE_BINARY_OPS
|
|
+// #define IMLIB_ENABLE_BINARY_OPS
|
|
|
|
// Enable math ops
|
|
-#define IMLIB_ENABLE_MATH_OPS
|
|
+// #define IMLIB_ENABLE_MATH_OPS
|
|
|
|
// Enable flood_fill()
|
|
-#define IMLIB_ENABLE_FLOOD_FILL
|
|
+// #define IMLIB_ENABLE_FLOOD_FILL
|
|
|
|
// Enable mean()
|
|
-#define IMLIB_ENABLE_MEAN
|
|
+// #define IMLIB_ENABLE_MEAN
|
|
|
|
// Enable median()
|
|
-#define IMLIB_ENABLE_MEDIAN
|
|
+// #define IMLIB_ENABLE_MEDIAN
|
|
|
|
// Enable mode()
|
|
-#define IMLIB_ENABLE_MODE
|
|
+// #define IMLIB_ENABLE_MODE
|
|
|
|
// Enable midpoint()
|
|
-#define IMLIB_ENABLE_MIDPOINT
|
|
+// #define IMLIB_ENABLE_MIDPOINT
|
|
|
|
// Enable morph()
|
|
-#define IMLIB_ENABLE_MORPH
|
|
+// #define IMLIB_ENABLE_MORPH
|
|
|
|
// Enable Gaussian
|
|
-#define IMLIB_ENABLE_GAUSSIAN
|
|
+// #define IMLIB_ENABLE_GAUSSIAN
|
|
|
|
// Enable Laplacian
|
|
-#define IMLIB_ENABLE_LAPLACIAN
|
|
+// #define IMLIB_ENABLE_LAPLACIAN
|
|
|
|
// Enable bilateral()
|
|
-#define IMLIB_ENABLE_BILATERAL
|
|
+// #define IMLIB_ENABLE_BILATERAL
|
|
|
|
// Enable cartoon()
|
|
// #define IMLIB_ENABLE_CARTOON
|
|
|
|
// Enable linpolar()
|
|
-#define IMLIB_ENABLE_LINPOLAR
|
|
+// #define IMLIB_ENABLE_LINPOLAR
|
|
|
|
// Enable logpolar()
|
|
-#define IMLIB_ENABLE_LOGPOLAR
|
|
+// #define IMLIB_ENABLE_LOGPOLAR
|
|
|
|
// Enable lens_corr()
|
|
-#define IMLIB_ENABLE_LENS_CORR
|
|
+// #define IMLIB_ENABLE_LENS_CORR
|
|
|
|
// Enable rotation_corr()
|
|
-#define IMLIB_ENABLE_ROTATION_CORR
|
|
+// #define IMLIB_ENABLE_ROTATION_CORR
|
|
|
|
// Enable phasecorrelate()
|
|
#if defined(IMLIB_ENABLE_ROTATION_CORR)
|
|
-#define IMLIB_ENABLE_FIND_DISPLACEMENT
|
|
+// #define IMLIB_ENABLE_FIND_DISPLACEMENT
|
|
#endif
|
|
|
|
// Enable get_similarity()
|
|
-#define IMLIB_ENABLE_GET_SIMILARITY
|
|
+// #define IMLIB_ENABLE_GET_SIMILARITY
|
|
|
|
// Enable find_lines()
|
|
-#define IMLIB_ENABLE_FIND_LINES
|
|
+// #define IMLIB_ENABLE_FIND_LINES
|
|
|
|
// Enable find_line_segments()
|
|
-#define IMLIB_ENABLE_FIND_LINE_SEGMENTS
|
|
+// #define IMLIB_ENABLE_FIND_LINE_SEGMENTS
|
|
|
|
// Enable find_circles()
|
|
-#define IMLIB_ENABLE_FIND_CIRCLES
|
|
+// #define IMLIB_ENABLE_FIND_CIRCLES
|
|
|
|
// Enable find_rects()
|
|
-#define IMLIB_ENABLE_FIND_RECTS
|
|
+// #define IMLIB_ENABLE_FIND_RECTS
|
|
|
|
// Enable find_qrcodes() (14 KB)
|
|
-#define IMLIB_ENABLE_QRCODES
|
|
+//#define IMLIB_ENABLE_QRCODES
|
|
|
|
// Enable find_apriltags() (64 KB)
|
|
-#define IMLIB_ENABLE_APRILTAGS
|
|
+//#define IMLIB_ENABLE_APRILTAGS
|
|
|
|
// Enable fine find_apriltags() - (8-way connectivity versus 4-way connectivity)
|
|
// #define IMLIB_ENABLE_FINE_APRILTAGS
|
|
@@ -110,10 +110,10 @@
|
|
// #define IMLIB_ENABLE_HIGH_RES_APRILTAGS
|
|
|
|
// Enable find_datamatrices() (26 KB)
|
|
-#define IMLIB_ENABLE_DATAMATRICES
|
|
+//#define IMLIB_ENABLE_DATAMATRICES
|
|
|
|
// Enable find_barcodes() (42 KB)
|
|
-#define IMLIB_ENABLE_BARCODES
|
|
+//#define IMLIB_ENABLE_BARCODES
|
|
|
|
// Enable CMSIS NN
|
|
// #if !defined(CUBEAI)
|
|
@@ -122,26 +122,26 @@
|
|
|
|
// Enable Tensor Flow
|
|
#if !defined(CUBEAI)
|
|
-#define IMLIB_ENABLE_TF
|
|
+//#define IMLIB_ENABLE_TF
|
|
#endif
|
|
|
|
// Enable FAST (20+ KBs).
|
|
// #define IMLIB_ENABLE_FAST
|
|
|
|
// Enable find_template()
|
|
-#define IMLIB_FIND_TEMPLATE
|
|
+// #define IMLIB_FIND_TEMPLATE
|
|
|
|
// Enable find_lbp()
|
|
-#define IMLIB_ENABLE_FIND_LBP
|
|
+// #define IMLIB_ENABLE_FIND_LBP
|
|
|
|
// Enable find_keypoints()
|
|
-#define IMLIB_ENABLE_FIND_KEYPOINTS
|
|
+// #define IMLIB_ENABLE_FIND_KEYPOINTS
|
|
|
|
// Enable load, save and match descriptor
|
|
-#define IMLIB_ENABLE_DESCRIPTOR
|
|
+// #define IMLIB_ENABLE_DESCRIPTOR
|
|
|
|
// Enable find_hog()
|
|
-#define IMLIB_ENABLE_HOG
|
|
+// #define IMLIB_ENABLE_HOG
|
|
|
|
// Enable selective_search()
|
|
// #define IMLIB_ENABLE_SELECTIVE_SEARCH
|
|
diff --git a/src/omv/boards/OPENMV4/omv_boardconfig.h b/src/omv/boards/OPENMV4/omv_boardconfig.h
|
|
index 412de472..f7da2c03 100644
|
|
--- a/src/omv/boards/OPENMV4/omv_boardconfig.h
|
|
+++ b/src/omv/boards/OPENMV4/omv_boardconfig.h
|
|
@@ -150,16 +150,18 @@
|
|
// The maximum available fb_alloc memory = FB_ALLOC_SIZE + FB_SIZE - (w*h*bpp).
|
|
#define OMV_FFS_MEMORY DTCM // Flash filesystem cache memory
|
|
#define OMV_MAIN_MEMORY SRAM1 // data, bss and heap memory
|
|
+#define OMV_MAIN_MEMORY2 SRAM5 // my memory
|
|
#define OMV_STACK_MEMORY ITCM // stack memory
|
|
#define OMV_DMA_MEMORY SRAM2 // DMA buffers memory.
|
|
#define OMV_FB_MEMORY AXI_SRAM // Framebuffer, fb_alloc
|
|
#define OMV_JPEG_MEMORY SRAM3 // JPEG buffer memory.
|
|
#define OMV_VOSPI_MEMORY SRAM4 // VoSPI buffer memory.
|
|
|
|
-#define OMV_FB_SIZE (400K) // FB memory: header + VGA/GS image
|
|
-#define OMV_FB_ALLOC_SIZE (100K) // minimum fb alloc size
|
|
+#define OMV_FB_SIZE (100K) // defualt: 400 FB memory: header + VGA/GS image
|
|
+#define OMV_FB_ALLOC_SIZE (50K) // default: 100 minimum fb alloc size
|
|
#define OMV_STACK_SIZE (64K)
|
|
-#define OMV_HEAP_SIZE (236K)
|
|
+#define OMV_HEAP_SIZE (136K)
|
|
+// #define OMV_HEAP_SIZE (236K)
|
|
|
|
#define OMV_LINE_BUF_SIZE (3 * 1024) // Image line buffer round(640 * 2BPP * 2 buffers).
|
|
#define OMV_MSC_BUF_SIZE (2K) // USB MSC bot data
|
|
@@ -175,21 +177,27 @@
|
|
#define OMV_DTCM_LENGTH 128K
|
|
#define OMV_ITCM_ORIGIN 0x00000000
|
|
#define OMV_ITCM_LENGTH 64K
|
|
-#define OMV_SRAM1_ORIGIN 0x30000000
|
|
-#define OMV_SRAM1_LENGTH 248K
|
|
+// #define OMV_SRAM1_ORIGIN 0x30000000
|
|
+// #define OMV_SRAM1_LENGTH 248K
|
|
+#define OMV_SRAM1_ORIGIN 0x24000000
|
|
+#define OMV_SRAM1_LENGTH 512K
|
|
#define OMV_SRAM2_ORIGIN 0x3003E000 // 8KB of SRAM1
|
|
#define OMV_SRAM2_LENGTH 8K
|
|
#define OMV_SRAM3_ORIGIN 0x30040000
|
|
#define OMV_SRAM3_LENGTH 32K
|
|
#define OMV_SRAM4_ORIGIN 0x38000000
|
|
#define OMV_SRAM4_LENGTH 64K
|
|
-#define OMV_AXI_SRAM_ORIGIN 0x24000000
|
|
-#define OMV_AXI_SRAM_LENGTH 512K
|
|
+#define OMV_AXI_SRAM_ORIGIN 0x30000000
|
|
+#define OMV_AXI_SRAM_LENGTH 248K
|
|
+// #define OMV_AXI_SRAM_ORIGIN 0x24000000
|
|
+// #define OMV_AXI_SRAM_LENGTH 512K
|
|
+
|
|
|
|
// Domain 1 DMA buffers region.
|
|
#define OMV_DMA_MEMORY_D1 AXI_SRAM
|
|
#define OMV_DMA_MEMORY_D1_SIZE (8*1024) // Reserved memory for DMA buffers
|
|
-#define OMV_DMA_REGION_D1_BASE (OMV_AXI_SRAM_ORIGIN+(500*1024))
|
|
+#define OMV_DMA_REGION_D1_BASE (OMV_AXI_SRAM_ORIGIN+(400*1024))
|
|
+// #define OMV_DMA_REGION_D1_BASE (OMV_AXI_SRAM_ORIGIN+(500*1024))
|
|
#define OMV_DMA_REGION_D1_SIZE MPU_REGION_SIZE_8KB
|
|
|
|
// Domain 2 DMA buffers region.
|
|
diff --git a/src/omv/modules/examplemodule.c b/src/omv/modules/examplemodule.c
|
|
index 37e2b4f4..52d1bda2 100644
|
|
--- a/src/omv/modules/examplemodule.c
|
|
+++ b/src/omv/modules/examplemodule.c
|
|
@@ -1,17 +1,81 @@
|
|
// Include MicroPython API.
|
|
#include "py/runtime.h"
|
|
+#include "genNN.h"
|
|
+#include "detectionUtility.h"
|
|
+#include <stdio.h>
|
|
+#include "py_image.h"
|
|
|
|
-// This is the function which will be called from Python as cexample.add_ints(a, b).
|
|
-STATIC mp_obj_t example_add_ints(mp_obj_t a_obj, mp_obj_t b_obj) {
|
|
- // Extract the ints from the micropython input objects.
|
|
- int a = mp_obj_get_int(a_obj);
|
|
- int b = mp_obj_get_int(b_obj);
|
|
+#define TEST_SIZE 1 * 1024
|
|
+#define TN_MAX(A,B) ((A) > (B) ? (A) : (B))
|
|
+#define TN_MIN(A,B) ((A) < (B) ? (A) : (B))``
|
|
+
|
|
+// for fc only
|
|
+#define ORIGIN_H 128
|
|
+#define ORIGIN_W 128
|
|
+#define IMAGE_H 128
|
|
+#define IMAGE_W 128
|
|
|
|
- // Calculate the addition and convert to MicroPython object.
|
|
- return mp_obj_new_int(a + b);
|
|
+uint16_t color;
|
|
+float labels[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
+// This is the function which will be called from Python as cexample.add_ints(a, b).
|
|
+STATIC mp_obj_t example_train_demo_fn(mp_obj_t a, mp_obj_t b) {
|
|
+ image_t* img = py_image_cobj(a);
|
|
+ // >= 0, for training with the label, -1 is for inference
|
|
+ int command = mp_obj_get_int(b);
|
|
+ // preprocessing
|
|
+ signed char *input = getInput();
|
|
+ int i, j;
|
|
+ for (j = 0; j < IMAGE_H; j++){
|
|
+ for (i = 0; i < IMAGE_W; i++){
|
|
+ int index = j + IMAGE_W * i;
|
|
+ if (i >= ORIGIN_W || j >= ORIGIN_H){
|
|
+ input[index * 3] = (int8_t) 0;
|
|
+ input[index * 3 + 1] = (int8_t) 0;
|
|
+ input[index * 3 + 2] = (int8_t) 0;
|
|
+ continue;
|
|
+ }
|
|
+ uint16_t color = IMAGE_GET_RGB565_PIXEL(img, i + MAX((ORIGIN_W-IMAGE_W)/2,0),
|
|
+ j + MAX((ORIGIN_H-IMAGE_H)/2,0));
|
|
+ // uint16_t color = IMAGE_GET_RGB565_PIXEL(img, j, 87 - i);
|
|
+ int r, g, b;
|
|
+ r = ((color & 0xF800) >> 11) * 8;
|
|
+ g = ((color & 0x07E0) >> 5) * 4;
|
|
+ b = ((color & 0x001F) >> 0) * 8;
|
|
+ input[index * 3] = (int8_t) (r - 128);
|
|
+ input[index * 3 + 1] = (int8_t) (g - 128);
|
|
+ input[index * 3 + 2] = (int8_t) (b - 128);
|
|
+ }
|
|
+ }
|
|
+ if (command >= 0){
|
|
+ labels[0] = 0;
|
|
+ labels[1] = 0;
|
|
+ labels[command] = 1;
|
|
+ invoke(labels);
|
|
+ printf("train class %d\n", command);
|
|
+ }
|
|
+ else{
|
|
+ invoke_inf();
|
|
+ uint8_t* output = (uint8_t*)getOutput();
|
|
+ if(output[0] > output[1]){
|
|
+ printf("infer class 0\n");
|
|
+ color = 63488;
|
|
+ }
|
|
+ else{
|
|
+ printf("infer class 1\n");
|
|
+ color = 2016;
|
|
+ }
|
|
+ int x_start = 3, y_start = 3;
|
|
+ for (i = 0; i < 5; i++){
|
|
+ for (j = 0; j < 5; j++){
|
|
+ IMAGE_PUT_RGB565_PIXEL(img, i + x_start, j + y_start, color);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ return mp_obj_new_int(0);
|
|
}
|
|
+
|
|
// Define a Python reference to the function above.
|
|
-STATIC MP_DEFINE_CONST_FUN_OBJ_2(example_add_ints_obj, example_add_ints);
|
|
+STATIC MP_DEFINE_CONST_FUN_OBJ_2(example_train_demo, example_train_demo_fn);
|
|
|
|
// Define all properties of the module.
|
|
// Table entries are key/value pairs of the attribute name (a string)
|
|
@@ -20,7 +84,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(example_add_ints_obj, example_add_ints);
|
|
// optimized to word-sized integers by the build system (interned strings).
|
|
STATIC const mp_rom_map_elem_t example_module_globals_table[] = {
|
|
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_cexample) },
|
|
- { MP_ROM_QSTR(MP_QSTR_add_ints), MP_ROM_PTR(&example_add_ints_obj) },
|
|
+ { MP_ROM_QSTR(MP_QSTR_train_demo), MP_ROM_PTR(&example_train_demo) },
|
|
};
|
|
STATIC MP_DEFINE_CONST_DICT(example_module_globals, example_module_globals_table);
|
|
|
|
@@ -33,4 +97,4 @@ const mp_obj_module_t example_user_cmodule = {
|
|
// Register the module to make it available in Python.
|
|
// Note: This module is disabled, set the thrid argument to 1 to enable it, or
|
|
// use a macro like MODULE_CEXAMPLE_ENABLED to conditionally enable this module.
|
|
-MP_REGISTER_MODULE(MP_QSTR_cexample, example_user_cmodule, 0);
|
|
+MP_REGISTER_MODULE(MP_QSTR_cexample, example_user_cmodule, 1);
|
|
diff --git a/src/omv/ports/stm32/omv_portconfig.mk b/src/omv/ports/stm32/omv_portconfig.mk
|
|
index 200ffb7d..b3049e25 100644
|
|
--- a/src/omv/ports/stm32/omv_portconfig.mk
|
|
+++ b/src/omv/ports/stm32/omv_portconfig.mk
|
|
@@ -4,7 +4,7 @@ STARTUP ?= st/startup_$(shell echo $(MCU) | tr '[:upper:]' '[:lower:]')
|
|
LDSCRIPT ?= stm32fxxx
|
|
|
|
# Compiler Flags
|
|
-CFLAGS += -std=gnu99 -Wall -Werror -Warray-bounds -mthumb -nostartfiles -fdata-sections -ffunction-sections
|
|
+CFLAGS += -std=gnu99 -Warray-bounds -mthumb -nostartfiles -fdata-sections -ffunction-sections -lm
|
|
CFLAGS += -fno-inline-small-functions -D$(MCU) -D$(CFLAGS_MCU) -D$(ARM_MATH) -DARM_NN_TRUNCATE\
|
|
-fsingle-precision-constant -Wdouble-promotion -mcpu=$(CPU) -mtune=$(CPU) -mfpu=$(FPU) -mfloat-abi=hard
|
|
CFLAGS += -D__FPU_PRESENT=1 -D__VFP_FP__ -DUSE_USB_FS -DUSE_DEVICE_MODE -DUSE_USB_OTG_ID=0 -DHSE_VALUE=$(OMV_HSE_VALUE)\
|
|
@@ -34,6 +34,10 @@ OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/alloc/
|
|
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/common/
|
|
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/imlib/
|
|
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/
|
|
+OMV_CFLAGS += -I$(TOP_DIR)/hal/cmsis/include/
|
|
+OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/TinyEngine
|
|
+OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/TinyEngine/include
|
|
+OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/TinyEngine/codegen/Include
|
|
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/sensors/
|
|
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/ports/$(PORT)/
|
|
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/ports/$(PORT)/modules/
|
|
@@ -213,6 +217,50 @@ FIRM_OBJ += $(addprefix $(BUILD)/$(OMV_DIR)/imlib/, \
|
|
zbar.o \
|
|
)
|
|
|
|
+FIRM_OBJ += $(addprefix $(BUILD)/$(OMV_DIR)/modules/TinyEngine/, \
|
|
+ codegen/Source/genModel.o \
|
|
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq.o \
|
|
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq.o \
|
|
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq.o \
|
|
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq.o \
|
|
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq.o \
|
|
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq_bitmask.o \
|
|
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq_bitmask.o \
|
|
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq_bitmask.o \
|
|
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq_bitmask.o \
|
|
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq_bitmask.o \
|
|
+ src/kernels/fp_requantize_op/add_fpreq.o \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch8_fpreq.o \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch16_fpreq.o \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch24_fpreq.o \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch48_fpreq.o \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq.o \
|
|
+ src/kernels/int_forward_op/avgpooling.o \
|
|
+ src/kernels/fp_requantize_op/convolve_s8_kernel3_inputch3_stride2_pad1_fpreq.o \
|
|
+ src/kernels/fp_requantize_op/mat_mul_kernels_fpreq.o \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask.o \
|
|
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask_partialCH.o \
|
|
+ src/kernels/fp_backward_op/sum_4D_exclude_fp.o \
|
|
+ src/kernels/fp_backward_op/where_fp.o \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride1_inpad1_outpad0.o \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride2_inpad1_outpad1.o \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride1_inpad2_outpad0.o \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride2_inpad2_outpad1.o \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride1_inpad3_outpad0.o \
|
|
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride2_inpad3_outpad1.o \
|
|
+ src/kernels/fp_backward_op/tte_exp_fp.o \
|
|
+ src/kernels/fp_backward_op/sub_fp.o \
|
|
+ src/kernels/fp_backward_op/mul_fp.o \
|
|
+ src/kernels/fp_backward_op/pointwise_conv_fp.o \
|
|
+ src/kernels/fp_backward_op/group_pointwise_conv_fp.o \
|
|
+ src/kernels/fp_backward_op/group_conv_fp_kernel4_stride1_pad0.o \
|
|
+ src/kernels/fp_backward_op/group_conv_fp_kernel8_stride1_pad0.o \
|
|
+ src/kernels/fp_backward_op/strided_slice_4Dto4D_fp.o \
|
|
+ src/kernels/fp_backward_op/sum_3D_fp.o \
|
|
+ src/kernels/fp_backward_op/nll_loss_fp.o \
|
|
+ src/kernels/fp_backward_op/log_softmax_fp.o \
|
|
+ )
|
|
+
|
|
FIRM_OBJ += $(wildcard $(BUILD)/$(OMV_DIR)/ports/$(PORT)/*.o)
|
|
FIRM_OBJ += $(wildcard $(BUILD)/$(MICROPY_DIR)/modules/*.o)
|
|
FIRM_OBJ += $(wildcard $(BUILD)/$(MICROPY_DIR)/ports/$(PORT)/modules/*.o)
|
|
@@ -625,7 +673,7 @@ endif
|
|
# This target generates the main/app firmware image located at 0x08010000
|
|
$(FIRMWARE): FIRMWARE_OBJS
|
|
$(CPP) -P -E -I$(OMV_BOARD_CONFIG_DIR) $(OMV_DIR)/ports/$(PORT)/$(LDSCRIPT).ld.S > $(BUILD)/$(LDSCRIPT).lds
|
|
- $(CC) $(LDFLAGS) $(FIRM_OBJ) -o $(FW_DIR)/$(FIRMWARE).elf $(LIBS) -lgcc
|
|
+ $(CC) $(LDFLAGS) $(FIRM_OBJ) -o $(FW_DIR)/$(FIRMWARE).elf $(LIBS) -lgcc -lm
|
|
$(OBJCOPY) -Obinary -R .big_const* $(FW_DIR)/$(FIRMWARE).elf $(FW_DIR)/$(FIRMWARE).bin
|
|
$(PYTHON) $(MKDFU) -D $(DFU_DEVICE) -b $(MAIN_APP_ADDR):$(FW_DIR)/$(FIRMWARE).bin $(FW_DIR)/$(FIRMWARE).dfu
|
|
|
|
@@ -633,7 +681,7 @@ ifeq ($(OMV_ENABLE_BL), 1)
|
|
# This target generates the bootloader.
|
|
$(BOOTLOADER): FIRMWARE_OBJS BOOTLOADER_OBJS
|
|
$(CPP) -P -E -I$(OMV_BOARD_CONFIG_DIR) $(BOOTLDR_DIR)/stm32fxxx.ld.S > $(BUILD)/$(BOOTLDR_DIR)/stm32fxxx.lds
|
|
- $(CC) $(BL_LDFLAGS) $(BOOT_OBJ) -o $(FW_DIR)/$(BOOTLOADER).elf -lgcc
|
|
+ $(CC) $(BL_LDFLAGS) $(BOOT_OBJ) -o $(FW_DIR)/$(BOOTLOADER).elf -lgcc -lm
|
|
$(OBJCOPY) -Obinary $(FW_DIR)/$(BOOTLOADER).elf $(FW_DIR)/$(BOOTLOADER).bin
|
|
$(PYTHON) $(MKDFU) -D $(DFU_DEVICE) -b 0x08000000:$(FW_DIR)/$(BOOTLOADER).bin $(FW_DIR)/$(BOOTLOADER).dfu
|
|
endif
|