HG_REPOSITORIES/LPP/INSTRUMENTATION/libuc2 Files · lib/src/lpc17XX/CMSIS/MATH/arm_fir_q7.c

Generic lib clean target bug fixed

jeandet@pc-de-jeandet3.lab-lpp.local - - Load All Authors

File last commit:

r16:194f637a2640 default


                r17:24654bf85fa1

default

Download file

             arm_fir_q7.c
        
                    207 lines
            
             | 6.4 KiB
            
                | text/x-c
            
             |
                CLexer
            
             / lib / src / lpc17XX / CMSIS / MATH / arm_fir_q7.c
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      /* ----------------------------------------------------------------------  

      * Copyright (C) 2010 ARM Limited. All rights reserved.  

      *  

      * $Date:        29. November 2010  

      * $Revision: 	V1.0.3  

      *  

      * Project: 	    CMSIS DSP Library  

      * Title:        arm_fir_q7.c  

      *  

      * Description:  Q7 FIR filter processing function.  

      *  

      * Target Processor: Cortex-M4/Cortex-M3

      *  

      * Version 1.0.3 2010/11/29 

      *    Re-organized the CMSIS folders and updated documentation.  

      *   

      * Version 1.0.2 2010/11/11  

      *    Documentation updated.   

      *  

      * Version 1.0.1 2010/10/05   

      *    Production release and review comments incorporated.  

      *  

      * Version 1.0.0 2010/09/20   

      *    Production release and review comments incorporated.  

      *  

      * Version 0.0.5  2010/04/26   

      * 	 incorporated review comments and updated with latest CMSIS layer  

      *  

      * Version 0.0.3  2010/03/10   

      *    Initial version  

      * -------------------------------------------------------------------- */ 

      #include "arm_math.h" 

      /**  

       * @ingroup groupFilters  

       */ 

      /**  

       * @addtogroup FIR  

       * @{  

       */ 

      /**  

       * @param[in]   *S points to an instance of the Q7 FIR filter structure.  

       * @param[in]   *pSrc points to the block of input data.  

       * @param[out]  *pDst points to the block of output data.  

       * @param[in]   blockSize number of samples to process per call.  

       * @return 	none.  

       *  

       * <b>Scaling and Overflow Behavior:</b>  

       * \par  

       * The function is implemented using a 32-bit internal accumulator.  

       * Both coefficients and state variables are represented in 1.7 format and multiplications yield a 2.14 result.  

       * The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format.  

       * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  

       * The accumulator is converted to 18.7 format by discarding the low 7 bits.  

       * Finally, the result is truncated to 1.7 format.  

       */ 

      void arm_fir_q7( 

        const arm_fir_instance_q7 * S, 

        q7_t * pSrc, 

        q7_t * pDst, 

        uint32_t blockSize) 

      { 

        uint32_t numTaps = S->numTaps;                 /* Number of taps in the filter */ 

        uint32_t i, blkCnt;                            /* Loop counters */ 

        q7_t *pState = S->pState;                      /* State pointer */ 

        q7_t *pCoeffs = S->pCoeffs;                    /* Coefficient pointer */ 

        q7_t *px, *pb;                                 /* Temporary pointers to state and coeff */ 

        q31_t acc = 0;                                 /* Accumlator */ 

        q31_t input1, input2;                          /* Temporary variables to store input */ 

        q15_t in1, in2;                                /* Temporary variables to store input */ 

        q7_t *pStateCurnt;                             /* Points to the current sample of the state */ 

        /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 

        /* pStateCurnt points to the location where the new input data should be written */ 

        pStateCurnt = S->pState + (numTaps - 1u); 

        i = blockSize >> 2u; 

        /* Copy four new input samples into the state buffer.  

         ** Use 32-bit SIMD to move the four 8-bit data.  Only requires one copy for every four samples. */ 

        while(i > 0u) 

        { 

          *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++; 

          i--; 

        } 

        i = blockSize % 0x4u; 

        /* Copy remining samples into the state buffer. */ 

        while(i > 0u) 

        { 

          *pStateCurnt++ = *pSrc++; 

          i--; 

        } 

        blkCnt = blockSize; 

        /* Perform filtering upto BlockSize - BlockSize%4  */ 

        while(blkCnt > 0u) 

        { 

          /* Set accumulator to zero */ 

          acc = 0; 

          /* Initialize state pointer of type q7 */ 

          px = pState; 

          /* Initialize coeff pointer of type q7 */ 

          pb = pCoeffs; 

          i = numTaps >> 2u; 

          /* Loop over the number of taps.  Unroll by a factor of 4.  

           ** Repeat until we've computed numTaps-4 coefficients. */ 

          while(i > 0u) 

          { 

            /* Reading two inputs of state buffer and packing */ 

            in1 = (q15_t) * px++; 

            in2 = (q15_t) * px++; 

            input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 

            /* Reading two inputs of coefficient buffer and packing */ 

            in1 = (q15_t) * pb++; 

            in2 = (q15_t) * pb++; 

            input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 

            /* Perform Multiply and accumlation of 2 packed inputs and coefficients using SMLALD and store the result in accumlator. */ 

            acc = __SMLAD(input1, input2, acc); 

            /* Reading two inputs of state buffer and packing */ 

            in1 = (q15_t) * px++; 

            in2 = (q15_t) * px++; 

            input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 

            /* Reading two inputs of coefficient buffer and packing */ 

            in1 = (q15_t) * pb++; 

            in2 = (q15_t) * pb++; 

            input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 

            /* Perform Multiply and accumlation of 2 packed inputs and coefficients using SMLALD and store the result in accumlator. */ 

            acc = __SMLAD(input1, input2, acc); 

            /* Decrement the tap loop counter */ 

            i--; 

          } 

          i = numTaps % 0x4u; 

          /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 

          while(i > 0u) 

          { 

            acc = __SMLAD(*px++, *pb++, acc); 

            i--; 

          } 

          /* Saturate output */ 

          acc = __SSAT((acc >> 7), 8); 

          /*Store filter output */ 

          *pDst++ = (q7_t) (acc); 

          /* Advance the state pointer by 1 to process the next sample */ 

          pState = pState + 1; 

          /* Decrement the loop counter */ 

          blkCnt--; 

        } 

        /* Processing is complete.  

         ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  

         ** This prepares the state buffer for the next function call. */ 

        /* Points to the start of the state buffer */ 

        pStateCurnt = S->pState; 

        /* Calculation of count for copying integer writes */ 

        i = (numTaps - 1u) >> 2u; 

        /* Copy four values using integer pointer */ 

        while(i > 0u) 

        { 

          *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 

          i--; 

        } 

        /* Calculation of count for remaining q7_t data */ 

        i = (numTaps - 1u) % 0x4u; 

        /* Copy of remaining q7_t data */ 

        while(i > 0u) 

        { 

          *pStateCurnt++ = *pState++; 

          i--; 

        } 

      } 

      /**  

       * @} end of FIR group  

       */

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				/* ----------------------------------------------------------------------
				* Copyright (C) 2010 ARM Limited. All rights reserved.
				*
				* $Date: 29. November 2010
				* $Revision: V1.0.3
				*
				* Project: CMSIS DSP Library
				* Title: arm_fir_q7.c
				*
				* Description: Q7 FIR filter processing function.
				*
				* Target Processor: Cortex-M4/Cortex-M3
				*
				* Version 1.0.3 2010/11/29
				* Re-organized the CMSIS folders and updated documentation.
				*
				* Version 1.0.2 2010/11/11
				* Documentation updated.
				*
				* Version 1.0.1 2010/10/05
				* Production release and review comments incorporated.
				*
				* Version 1.0.0 2010/09/20
				* Production release and review comments incorporated.
				*
				* Version 0.0.5 2010/04/26
				* incorporated review comments and updated with latest CMSIS layer
				*
				* Version 0.0.3 2010/03/10
				* Initial version
				* -------------------------------------------------------------------- */

				#include "arm_math.h"

				/**
				* @ingroup groupFilters
				*/

				/**
				* @addtogroup FIR
				* @{
				*/

				/**
				* @param[in] *S points to an instance of the Q7 FIR filter structure.
				* @param[in] *pSrc points to the block of input data.
				* @param[out] *pDst points to the block of output data.
				* @param[in] blockSize number of samples to process per call.
				* @return none.
				*
				* <b>Scaling and Overflow Behavior:</b>
				* \par
				* The function is implemented using a 32-bit internal accumulator.
				* Both coefficients and state variables are represented in 1.7 format and multiplications yield a 2.14 result.
				* The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format.
				* There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
				* The accumulator is converted to 18.7 format by discarding the low 7 bits.
				* Finally, the result is truncated to 1.7 format.
				*/

				void arm_fir_q7(
				const arm_fir_instance_q7 * S,
				q7_t * pSrc,
				q7_t * pDst,
				uint32_t blockSize)
				{
				uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
				uint32_t i, blkCnt; /* Loop counters */
				q7_t pState = S->pState; / State pointer */
				q7_t pCoeffs = S->pCoeffs; / Coefficient pointer */
				q7_t px, pb; /* Temporary pointers to state and coeff */
				q31_t acc = 0; /* Accumlator */
				q31_t input1, input2; /* Temporary variables to store input */
				q15_t in1, in2; /* Temporary variables to store input */
				q7_t pStateCurnt; / Points to the current sample of the state */


				/* S->pState points to state array which contains previous frame (numTaps - 1) samples */
				/* pStateCurnt points to the location where the new input data should be written */
				pStateCurnt = S->pState + (numTaps - 1u);

				i = blockSize >> 2u;

				/* Copy four new input samples into the state buffer.
				** Use 32-bit SIMD to move the four 8-bit data. Only requires one copy for every four samples. */
				while(i > 0u)
				{
				__SIMD32(pStateCurnt)++ = __SIMD32(pSrc)++;
				i--;
				}

				i = blockSize % 0x4u;

				/* Copy remining samples into the state buffer. */
				while(i > 0u)
				{
				pStateCurnt++ = pSrc++;
				i--;
				}

				blkCnt = blockSize;

				/* Perform filtering upto BlockSize - BlockSize%4 */
				while(blkCnt > 0u)
				{
				/* Set accumulator to zero */
				acc = 0;

				/* Initialize state pointer of type q7 */
				px = pState;

				/* Initialize coeff pointer of type q7 */
				pb = pCoeffs;

				i = numTaps >> 2u;

				/* Loop over the number of taps. Unroll by a factor of 4.
				** Repeat until we've computed numTaps-4 coefficients. */
				while(i > 0u)
				{
				/* Reading two inputs of state buffer and packing */
				in1 = (q15_t) * px++;
				in2 = (q15_t) * px++;
				input1 = ((q31_t) in1 & 0x0000FFFF) \| ((q31_t) in2 << 16);

				/* Reading two inputs of coefficient buffer and packing */
				in1 = (q15_t) * pb++;
				in2 = (q15_t) * pb++;
				input2 = ((q31_t) in1 & 0x0000FFFF) \| ((q31_t) in2 << 16);

				/* Perform Multiply and accumlation of 2 packed inputs and coefficients using SMLALD and store the result in accumlator. */
				acc = __SMLAD(input1, input2, acc);

				/* Reading two inputs of state buffer and packing */
				in1 = (q15_t) * px++;
				in2 = (q15_t) * px++;
				input1 = ((q31_t) in1 & 0x0000FFFF) \| ((q31_t) in2 << 16);

				/* Reading two inputs of coefficient buffer and packing */
				in1 = (q15_t) * pb++;
				in2 = (q15_t) * pb++;
				input2 = ((q31_t) in1 & 0x0000FFFF) \| ((q31_t) in2 << 16);

				/* Perform Multiply and accumlation of 2 packed inputs and coefficients using SMLALD and store the result in accumlator. */
				acc = __SMLAD(input1, input2, acc);

				/* Decrement the tap loop counter */
				i--;
				}

				i = numTaps % 0x4u;

				/* If the filter length is not a multiple of 4, compute the remaining filter taps */
				while(i > 0u)
				{
				acc = __SMLAD(px++, pb++, acc);
				i--;

				}

				/* Saturate output */
				acc = __SSAT((acc >> 7), 8);

				/Store filter output /
				*pDst++ = (q7_t) (acc);

				/* Advance the state pointer by 1 to process the next sample */
				pState = pState + 1;

				/* Decrement the loop counter */
				blkCnt--;
				}

				/* Processing is complete.
				** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
				** This prepares the state buffer for the next function call. */

				/* Points to the start of the state buffer */
				pStateCurnt = S->pState;

				/* Calculation of count for copying integer writes */
				i = (numTaps - 1u) >> 2u;

				/* Copy four values using integer pointer */
				while(i > 0u)
				{
				__SIMD32(pStateCurnt)++ = __SIMD32(pState)++;

				i--;

				}

				/* Calculation of count for remaining q7_t data */
				i = (numTaps - 1u) % 0x4u;

				/* Copy of remaining q7_t data */
				while(i > 0u)
				{
				pStateCurnt++ = pState++;
				i--;
				}

				}

				/**
				* @} end of FIR group
				*/