00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. July 2011 00005 * $Revision: V1.0.10 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cmplx_mult_cmplx_q31.c 00009 * 00010 * Description: Q31 complex-by-complex multiplication 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Version 1.0.10 2011/7/15 00015 * Big Endian support added and Merged M0 and M3/M4 Source code. 00016 * 00017 * Version 1.0.3 2010/11/29 00018 * Re-organized the CMSIS folders and updated documentation. 00019 * 00020 * Version 1.0.2 2010/11/11 00021 * Documentation updated. 00022 * 00023 * Version 1.0.1 2010/10/05 00024 * Production release and review comments incorporated. 00025 * 00026 * Version 1.0.0 2010/09/20 00027 * Production release and review comments incorporated. 00028 * -------------------------------------------------------------------- */ 00029 00030 #include "arm_math.h" 00031 00056 void arm_cmplx_mult_cmplx_q31( 00057 q31_t * pSrcA, 00058 q31_t * pSrcB, 00059 q31_t * pDst, 00060 uint32_t numSamples) 00061 { 00062 q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */ 00063 uint32_t blkCnt; /* loop counters */ 00064 00065 #ifndef ARM_MATH_CM0 00066 00067 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00068 00069 /* loop Unrolling */ 00070 blkCnt = numSamples >> 2u; 00071 00072 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00073 ** a second loop below computes the remaining 1 to 3 samples. */ 00074 while(blkCnt > 0u) 00075 { 00076 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00077 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00078 a = *pSrcA++; 00079 b = *pSrcA++; 00080 c = *pSrcB++; 00081 d = *pSrcB++; 00082 00083 /* store the real result in 3.29 format in the destination buffer. */ 00084 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 00085 /* store the imag result in 3.29 format in the destination buffer. */ 00086 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 00087 00088 a = *pSrcA++; 00089 b = *pSrcA++; 00090 c = *pSrcB++; 00091 d = *pSrcB++; 00092 00093 /* store the result in 3.29 format in the destination buffer. */ 00094 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 00095 /* store the result in 3.29 format in the destination buffer. */ 00096 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 00097 00098 a = *pSrcA++; 00099 b = *pSrcA++; 00100 c = *pSrcB++; 00101 d = *pSrcB++; 00102 00103 /* store the result in 3.29 format in the destination buffer. */ 00104 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 00105 /* store the result in 3.29 format in the destination buffer. */ 00106 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 00107 00108 a = *pSrcA++; 00109 b = *pSrcA++; 00110 c = *pSrcB++; 00111 d = *pSrcB++; 00112 00113 /* store the result in 3.29 format in the destination buffer. */ 00114 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 00115 /* store the result in 3.29 format in the destination buffer. */ 00116 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 00117 00118 /* Decrement the blockSize loop counter */ 00119 blkCnt--; 00120 } 00121 00122 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00123 ** No loop unrolling is used. */ 00124 blkCnt = numSamples % 0x4u; 00125 00126 while(blkCnt > 0u) 00127 { 00128 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00129 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00130 a = *pSrcA++; 00131 b = *pSrcA++; 00132 c = *pSrcB++; 00133 d = *pSrcB++; 00134 00135 /* store the result in 3.29 format in the destination buffer. */ 00136 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 00137 /* store the result in 3.29 format in the destination buffer. */ 00138 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 00139 00140 /* Decrement the blockSize loop counter */ 00141 blkCnt--; 00142 } 00143 00144 #else 00145 00146 /* Run the below code for Cortex-M0 */ 00147 00148 /* loop Unrolling */ 00149 blkCnt = numSamples >> 1u; 00150 00151 /* First part of the processing with loop unrolling. Compute 2 outputs at a time. 00152 ** a second loop below computes the remaining 1 sample. */ 00153 while(blkCnt > 0u) 00154 { 00155 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00156 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00157 a = *pSrcA++; 00158 b = *pSrcA++; 00159 c = *pSrcB++; 00160 d = *pSrcB++; 00161 00162 /* store the real result in 3.29 format in the destination buffer. */ 00163 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 00164 /* store the imag result in 3.29 format in the destination buffer. */ 00165 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 00166 00167 a = *pSrcA++; 00168 b = *pSrcA++; 00169 c = *pSrcB++; 00170 d = *pSrcB++; 00171 00172 /* store the result in 3.29 format in the destination buffer. */ 00173 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 00174 /* store the result in 3.29 format in the destination buffer. */ 00175 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 00176 00177 /* Decrement the blockSize loop counter */ 00178 blkCnt--; 00179 } 00180 00181 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00182 ** No loop unrolling is used. */ 00183 blkCnt = numSamples % 0x2u; 00184 00185 while(blkCnt > 0u) 00186 { 00187 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00188 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00189 a = *pSrcA++; 00190 b = *pSrcA++; 00191 c = *pSrcB++; 00192 d = *pSrcB++; 00193 00194 /* store the result in 3.29 format in the destination buffer. */ 00195 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 00196 /* store the result in 3.29 format in the destination buffer. */ 00197 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 00198 00199 /* Decrement the blockSize loop counter */ 00200 blkCnt--; 00201 } 00202 00203 #endif /* #ifndef ARM_MATH_CM0 */ 00204 00205 } 00206