28 #ifndef __CORE_CM4_SIMD_H
29 #define __CORE_CM4_SIMD_H
43 #if defined ( __CC_ARM )
47 #define __SADD8 __sadd8
48 #define __QADD8 __qadd8
49 #define __SHADD8 __shadd8
50 #define __UADD8 __uadd8
51 #define __UQADD8 __uqadd8
52 #define __UHADD8 __uhadd8
53 #define __SSUB8 __ssub8
54 #define __QSUB8 __qsub8
55 #define __SHSUB8 __shsub8
56 #define __USUB8 __usub8
57 #define __UQSUB8 __uqsub8
58 #define __UHSUB8 __uhsub8
59 #define __SADD16 __sadd16
60 #define __QADD16 __qadd16
61 #define __SHADD16 __shadd16
62 #define __UADD16 __uadd16
63 #define __UQADD16 __uqadd16
64 #define __UHADD16 __uhadd16
65 #define __SSUB16 __ssub16
66 #define __QSUB16 __qsub16
67 #define __SHSUB16 __shsub16
68 #define __USUB16 __usub16
69 #define __UQSUB16 __uqsub16
70 #define __UHSUB16 __uhsub16
73 #define __SHASX __shasx
75 #define __UQASX __uqasx
76 #define __UHASX __uhasx
79 #define __SHSAX __shsax
81 #define __UQSAX __uqsax
82 #define __UHSAX __uhsax
83 #define __USAD8 __usad8
84 #define __USADA8 __usada8
85 #define __SSAT16 __ssat16
86 #define __USAT16 __usat16
87 #define __UXTB16 __uxtb16
88 #define __UXTAB16 __uxtab16
89 #define __SXTB16 __sxtb16
90 #define __SXTAB16 __sxtab16
91 #define __SMUAD __smuad
92 #define __SMUADX __smuadx
93 #define __SMLAD __smlad
94 #define __SMLADX __smladx
95 #define __SMLALD __smlald
96 #define __SMLALDX __smlaldx
97 #define __SMUSD __smusd
98 #define __SMUSDX __smusdx
99 #define __SMLSD __smlsd
100 #define __SMLSDX __smlsdx
101 #define __SMLSLD __smlsld
102 #define __SMLSLDX __smlsldx
104 #define __QADD __qadd
105 #define __QSUB __qsub
107 #define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \
108 ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) )
110 #define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \
111 ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) )
118 #elif defined ( __ICCARM__ )
122 #include <cmsis_iar.h>
128 #elif defined ( __TMS470__ )
132 #include <cmsis_ccs.h>
138 #elif defined ( __GNUC__ )
146 __ASM
volatile (
"sadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
154 __ASM
volatile (
"qadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
162 __ASM
volatile (
"shadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
170 __ASM
volatile (
"uadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
178 __ASM
volatile (
"uqadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
186 __ASM
volatile (
"uhadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
195 __ASM
volatile (
"ssub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
203 __ASM
volatile (
"qsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
211 __ASM
volatile (
"shsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
219 __ASM
volatile (
"usub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
227 __ASM
volatile (
"uqsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
235 __ASM
volatile (
"uhsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
244 __ASM
volatile (
"sadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
252 __ASM
volatile (
"qadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
260 __ASM
volatile (
"shadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
268 __ASM
volatile (
"uadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
276 __ASM
volatile (
"uqadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
284 __ASM
volatile (
"uhadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
292 __ASM
volatile (
"ssub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
300 __ASM
volatile (
"qsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
308 __ASM
volatile (
"shsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
316 __ASM
volatile (
"usub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
324 __ASM
volatile (
"uqsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
332 __ASM
volatile (
"uhsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
340 __ASM
volatile (
"sasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
348 __ASM
volatile (
"qasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
356 __ASM
volatile (
"shasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
364 __ASM
volatile (
"uasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
372 __ASM
volatile (
"uqasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
380 __ASM
volatile (
"uhasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
388 __ASM
volatile (
"ssax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
396 __ASM
volatile (
"qsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
404 __ASM
volatile (
"shsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
412 __ASM
volatile (
"usax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
420 __ASM
volatile (
"uqsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
428 __ASM
volatile (
"uhsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
436 __ASM
volatile (
"usad8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
444 __ASM
volatile (
"usada8 %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
448 #define __SSAT16(ARG1,ARG2) \
450 uint32_t __RES, __ARG1 = (ARG1); \
451 __ASM ("ssat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \
455 #define __USAT16(ARG1,ARG2) \
457 uint32_t __RES, __ARG1 = (ARG1); \
458 __ASM ("usat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \
462 __attribute__( ( always_inline ) ) __STATIC_INLINE
uint32_t __UXTB16(
uint32_t op1)
466 __ASM
volatile (
"uxtb16 %0, %1" :
"=r" (result) :
"r" (op1));
474 __ASM
volatile (
"uxtab16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
478 __attribute__( ( always_inline ) ) __STATIC_INLINE
uint32_t __SXTB16(
uint32_t op1)
482 __ASM
volatile (
"sxtb16 %0, %1" :
"=r" (result) :
"r" (op1));
490 __ASM
volatile (
"sxtab16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
498 __ASM
volatile (
"smuad %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
506 __ASM
volatile (
"smuadx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
514 __ASM
volatile (
"smlad %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
522 __ASM
volatile (
"smladx %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
526 #define __SMLALD(ARG1,ARG2,ARG3) \
528 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((uint64_t)(ARG3) >> 32), __ARG3_L = (uint32_t)((uint64_t)(ARG3) & 0xFFFFFFFFUL); \
529 __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
530 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
533 #define __SMLALDX(ARG1,ARG2,ARG3) \
535 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((uint64_t)(ARG3) >> 32), __ARG3_L = (uint32_t)((uint64_t)(ARG3) & 0xFFFFFFFFUL); \
536 __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
537 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
544 __ASM
volatile (
"smusd %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
552 __ASM
volatile (
"smusdx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
560 __ASM
volatile (
"smlsd %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
568 __ASM
volatile (
"smlsdx %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
572 #define __SMLSLD(ARG1,ARG2,ARG3) \
574 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((ARG3) >> 32), __ARG3_L = (uint32_t)((ARG3) & 0xFFFFFFFFUL); \
575 __ASM volatile ("smlsld %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
576 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
579 #define __SMLSLDX(ARG1,ARG2,ARG3) \
581 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((ARG3) >> 32), __ARG3_L = (uint32_t)((ARG3) & 0xFFFFFFFFUL); \
582 __ASM volatile ("smlsldx %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
583 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
590 __ASM
volatile (
"sel %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
598 __ASM
volatile (
"qadd %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
606 __ASM
volatile (
"qsub %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
610 #define __PKHBT(ARG1,ARG2,ARG3) \
612 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \
613 __ASM ("pkhbt %0, %1, %2, lsl %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \
617 #define __PKHTB(ARG1,ARG2,ARG3) \
619 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \
621 __ASM ("pkhtb %0, %1, %2" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2) ); \
623 __ASM ("pkhtb %0, %1, %2, asr %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \
631 #elif defined ( __TASKING__ )