/*
    *MIPS32 inpstructions assembler code
*/
    
    
/* Global functions declarations */
.global fir1_5842
.global fir2_5842
.global fir3_5842

    
    

/*
    fir1_5842 - FIR x2 half-band upsampler (1-st stage of SM5842)
	with 12db attenuation to saturation
     
    external declaration:     extern void fir1_5842(void *pSrc, void *pDst);

    input:
    pSrc - int32 source data pointer ($a0)

    output:
    pDst - int32 result destination data pointer ($a1)

    Source data array must contain dual channel int32 data samples.
    Function read 1 dual channel sample, upsample and return 2 dual channel samples
    with 6db attenuation.
*/

fir1_5842:
    // load four coefficients 1
    LI $v0, -12
    LI $v1, 26
    LI $a2, -52
    LI $a3, 95
    // load Left data samples
    LW $t0, 0($a0)
    LW $t1, 8($a0)
    LW $t2, 16($a0)
    LW $t3, 24($a0)
    LW $t4, 640($a0)
    LW $t5, 648($a0)
    LW $t6, 656($a0)
    LW $t7, 664($a0)
    // multiply-acccumulate Left
    MULT $ac0, $t0, $v0
    MADD $ac0, $t1, $v1
    MADD $ac0, $t2, $a2
    MADD $ac0, $t3, $a3
    MADD $ac0, $t4, $a3
    MADD $ac0, $t5, $a2
    MADD $ac0, $t6, $v1
    MADD $ac0, $t7, $v0
    // load Right data samples
    LW $t0, 4($a0)
    LW $t1, 12($a0)
    LW $t2, 20($a0)
    LW $t3, 28($a0)
    LW $t4, 644($a0)
    LW $t5, 652($a0)
    LW $t6, 660($a0)
    LW $t7, 668($a0)
    // multiply-acccumulate Right
    MULT $ac1, $t0, $v0
    MADD $ac1, $t1, $v1
    MADD $ac1, $t2, $a2
    MADD $ac1, $t3, $a3
    MADD $ac1, $t4, $a3
    MADD $ac1, $t5, $a2
    MADD $ac1, $t6, $v1
    MADD $ac1, $t7, $v0

    // load four coefficients 2
    LI $v0, -163
    LI $v1, 265
    LI $a2, -414
    LI $a3, 626
    // load Left data samples
    LW $t0, 32($a0)
    LW $t1, 40($a0)
    LW $t2, 48($a0)
    LW $t3, 56($a0)
    LW $t4, 608($a0)
    LW $t5, 616($a0)
    LW $t6, 624($a0)
    LW $t7, 632($a0)
    // multiply-acccumulate Left
    MADD $ac0, $t0, $v0
    MADD $ac0, $t1, $v1
    MADD $ac0, $t2, $a2
    MADD $ac0, $t3, $a3
    MADD $ac0, $t4, $a3
    MADD $ac0, $t5, $a2
    MADD $ac0, $t6, $v1
    MADD $ac0, $t7, $v0
    // Right Left data samples
    LW $t0, 36($a0)
    LW $t1, 44($a0)
    LW $t2, 52($a0)
    LW $t3, 60($a0)
    LW $t4, 612($a0)
    LW $t5, 620($a0)
    LW $t6, 628($a0)
    LW $t7, 636($a0)
    // multiply-acccumulate Right
    MADD $ac1, $t0, $v0
    MADD $ac1, $t1, $v1
    MADD $ac1, $t2, $a2
    MADD $ac1, $t3, $a3
    MADD $ac1, $t4, $a3
    MADD $ac1, $t5, $a2
    MADD $ac1, $t6, $v1
    MADD $ac1, $t7, $v0

    // load four coefficients 3
    LI $v0, -918
    LI $v1, 1316
    LI $a2, -1846
    LI $a3, 2542
    // load Left data samples
    LW $t0, 64($a0)
    LW $t1, 72($a0)
    LW $t2, 80($a0)
    LW $t3, 88($a0)
    LW $t4, 576($a0)
    LW $t5, 584($a0)
    LW $t6, 592($a0)
    LW $t7, 600($a0)
    // multiply-acccumulate Left
    MADD $ac0, $t0, $v0
    MADD $ac0, $t1, $v1
    MADD $ac0, $t2, $a2
    MADD $ac0, $t3, $a3
    MADD $ac0, $t4, $a3
    MADD $ac0, $t5, $a2
    MADD $ac0, $t6, $v1
    MADD $ac0, $t7, $v0
    // Right Left data samples
    LW $t0, 68($a0)
    LW $t1, 76($a0)
    LW $t2, 84($a0)
    LW $t3, 92($a0)
    LW $t4, 580($a0)
    LW $t5, 588($a0)
    LW $t6, 596($a0)
    LW $t7, 604($a0)
    // multiply-acccumulate Right
    MADD $ac1, $t0, $v0
    MADD $ac1, $t1, $v1
    MADD $ac1, $t2, $a2
    MADD $ac1, $t3, $a3
    MADD $ac1, $t4, $a3
    MADD $ac1, $t5, $a2
    MADD $ac1, $t6, $v1
    MADD $ac1, $t7, $v0
    
    // load four coefficients 4
    LI $v0, -3440
    LI $v1, 4587
    LI $a2, -6031
    LI $a3, 7831
    // load Left data samples
    LW $t0, 96($a0)
    LW $t1, 104($a0)
    LW $t2, 112($a0)
    LW $t3, 120($a0)
    LW $t4, 544($a0)
    LW $t5, 552($a0)
    LW $t6, 560($a0)
    LW $t7, 568($a0)
    // multiply-acccumulate Left
    MADD $ac0, $t0, $v0
    MADD $ac0, $t1, $v1
    MADD $ac0, $t2, $a2
    MADD $ac0, $t3, $a3
    MADD $ac0, $t4, $a3
    MADD $ac0, $t5, $a2
    MADD $ac0, $t6, $v1
    MADD $ac0, $t7, $v0
    // load Right data samples
    LW $t0, 100($a0)
    LW $t1, 108($a0)
    LW $t2, 116($a0)
    LW $t3, 124($a0)
    LW $t4, 548($a0)
    LW $t5, 556($a0)
    LW $t6, 564($a0)
    LW $t7, 572($a0)
    // multiply-acccumulate Right
    MADD $ac1, $t0, $v0
    MADD $ac1, $t1, $v1
    MADD $ac1, $t2, $a2
    MADD $ac1, $t3, $a3
    MADD $ac1, $t4, $a3
    MADD $ac1, $t5, $a2
    MADD $ac1, $t6, $v1
    MADD $ac1, $t7, $v0
    
    // load four coefficients 5
    LI $v0, -10051
    LI $v1, 12765
    LI $a2, -16052
    LI $a3, 20003
    // load Left data samples
    LW $t0, 128($a0)
    LW $t1, 136($a0)
    LW $t2, 144($a0)
    LW $t3, 152($a0)
    LW $t4, 512($a0)
    LW $t5, 520($a0)
    LW $t6, 528($a0)
    LW $t7, 536($a0)
    // multiply-acccumulate Left
    MADD $ac0, $t0, $v0
    MADD $ac0, $t1, $v1
    MADD $ac0, $t2, $a2
    MADD $ac0, $t3, $a3
    MADD $ac0, $t4, $a3
    MADD $ac0, $t5, $a2
    MADD $ac0, $t6, $v1
    MADD $ac0, $t7, $v0
    // load Right data samples
    LW $t0, 132($a0)
    LW $t1, 140($a0)
    LW $t2, 148($a0)
    LW $t3, 156($a0)
    LW $t4, 516($a0)
    LW $t5, 524($a0)
    LW $t6, 532($a0)
    LW $t7, 540($a0)
    // multiply-acccumulate Right
    MADD $ac1, $t0, $v0
    MADD $ac1, $t1, $v1
    MADD $ac1, $t2, $a2
    MADD $ac1, $t3, $a3
    MADD $ac1, $t4, $a3
    MADD $ac1, $t5, $a2
    MADD $ac1, $t6, $v1
    MADD $ac1, $t7, $v0
    
    // load four coefficients 6
    LI $v0, -24719
    LI $v1, 30312
    LI $a2, -36908
    LI $a3, 44651
    // load Left data samples
    LW $t0, 160($a0)
    LW $t1, 168($a0)
    LW $t2, 176($a0)
    LW $t3, 184($a0)
    LW $t4, 480($a0)
    LW $t5, 488($a0)
    LW $t6, 496($a0)
    LW $t7, 504($a0)
    // multiply-acccumulate Left
    MADD $ac0, $t0, $v0
    MADD $ac0, $t1, $v1
    MADD $ac0, $t2, $a2
    MADD $ac0, $t3, $a3
    MADD $ac0, $t4, $a3
    MADD $ac0, $t5, $a2
    MADD $ac0, $t6, $v1
    MADD $ac0, $t7, $v0
    // load Right data samples
    LW $t0, 164($a0)
    LW $t1, 172($a0)
    LW $t2, 180($a0)
    LW $t3, 188($a0)
    LW $t4, 484($a0)
    LW $t5, 492($a0)
    LW $t6, 500($a0)
    LW $t7, 508($a0)
    // multiply-acccumulate Right
    MADD $ac1, $t0, $v0
    MADD $ac1, $t1, $v1
    MADD $ac1, $t2, $a2
    MADD $ac1, $t3, $a3
    MADD $ac1, $t4, $a3
    MADD $ac1, $t5, $a2
    MADD $ac1, $t6, $v1
    MADD $ac1, $t7, $v0
    
    // load four coefficients 7
    LI $v0, -53702
    LI $v1, 64253
    LI $a2, -76529
    LI $a3, 90803
    // load Left data samples
    LW $t0, 192($a0)
    LW $t1, 200($a0)
    LW $t2, 208($a0)
    LW $t3, 216($a0)
    LW $t4, 448($a0)
    LW $t5, 456($a0)
    LW $t6, 464($a0)
    LW $t7, 472($a0)
    // multiply-acccumulate Left
    MADD $ac0, $t0, $v0
    MADD $ac0, $t1, $v1
    MADD $ac0, $t2, $a2
    MADD $ac0, $t3, $a3
    MADD $ac0, $t4, $a3
    MADD $ac0, $t5, $a2
    MADD $ac0, $t6, $v1
    MADD $ac0, $t7, $v0
    // load Right data samples
    LW $t0, 196($a0)
    LW $t1, 204($a0)
    LW $t2, 212($a0)
    LW $t3, 220($a0)
    LW $t4, 452($a0)
    LW $t5, 460($a0)
    LW $t6, 468($a0)
    LW $t7, 476($a0)
    // multiply-acccumulate Right
    MADD $ac1, $t0, $v0
    MADD $ac1, $t1, $v1
    MADD $ac1, $t2, $a2
    MADD $ac1, $t3, $a3
    MADD $ac1, $t4, $a3
    MADD $ac1, $t5, $a2
    MADD $ac1, $t6, $v1
    MADD $ac1, $t7, $v0
    
    // load four coefficients 8
    LI $v0, -107417
    LI $v1, 126805
    LI $a2, -149544
    LI $a3, 176416  
    // load Left data samples
    LW $t0, 224($a0)
    LW $t1, 232($a0)
    LW $t2, 240($a0)
    LW $t3, 248($a0)
    LW $t4, 416($a0)
    LW $t5, 424($a0)
    LW $t6, 432($a0)
    LW $t7, 440($a0)
    // multiply-acccumulate Left
    MADD $ac0, $t0, $v0
    MADD $ac0, $t1, $v1
    MADD $ac0, $t2, $a2
    MADD $ac0, $t3, $a3
    MADD $ac0, $t4, $a3
    MADD $ac0, $t5, $a2
    MADD $ac0, $t6, $v1
    MADD $ac0, $t7, $v0
   // load Right data samples
    LW $t0, 228($a0)
    LW $t1, 236($a0)
    LW $t2, 244($a0)
    LW $t3, 252($a0)
    LW $t4, 420($a0)
    LW $t5, 428($a0)
    LW $t6, 436($a0)
    LW $t7, 444($a0)
    // multiply-acccumulate Right
    MADD $ac1, $t0, $v0
    MADD $ac1, $t1, $v1
    MADD $ac1, $t2, $a2
    MADD $ac1, $t3, $a3
    MADD $ac1, $t4, $a3
    MADD $ac1, $t5, $a2
    MADD $ac1, $t6, $v1
    MADD $ac1, $t7, $v0
    
    // load four coefficients 9
    LI $v0, -208527
    LI $v1, 247500
    LI $a2, -295824
    LI $a3, 357532  
    // load Left data samples
    LW $t0, 256($a0)
    LW $t1, 264($a0)
    LW $t2, 272($a0)
    LW $t3, 280($a0)
    LW $t4, 384($a0)
    LW $t5, 392($a0)
    LW $t6, 400($a0)
    LW $t7, 408($a0)
    // multiply-acccumulate Left
    MADD $ac0, $t0, $v0
    MADD $ac0, $t1, $v1
    MADD $ac0, $t2, $a2
    MADD $ac0, $t3, $a3
    MADD $ac0, $t4, $a3
    MADD $ac0, $t5, $a2
    MADD $ac0, $t6, $v1
    MADD $ac0, $t7, $v0
    // load Right data samples
    LW $t0, 260($a0)
    LW $t1, 268($a0)
    LW $t2, 276($a0)
    LW $t3, 284($a0)
    LW $t4, 388($a0)
    LW $t5, 396($a0)
    LW $t6, 404($a0)
    LW $t7, 412($a0)
    // multiply-acccumulate Right
    MADD $ac1, $t0, $v0
    MADD $ac1, $t1, $v1
    MADD $ac1, $t2, $a2
    MADD $ac1, $t3, $a3
    MADD $ac1, $t4, $a3
    MADD $ac1, $t5, $a2
    MADD $ac1, $t6, $v1
    MADD $ac1, $t7, $v0
    
    // load four coefficients 10
    LI $v0, -439605
    LI $v1, 555269
    LI $a2, -732914
    LI $a3, 1046453   
    // load Left data samples
    LW $t0, 288($a0)
    LW $t1, 296($a0)
    LW $t2, 304($a0)
    LW $t3, 312($a0)
    LW $t4, 352($a0)
    LW $t5, 360($a0)
    LW $t6, 368($a0)
    LW $t7, 376($a0)
    // multiply-acccumulate Left
    MADD $ac0, $t0, $v0
    MADD $ac0, $t1, $v1
    MADD $ac0, $t2, $a2
    MADD $ac0, $t3, $a3
    MADD $ac0, $t4, $a3
    MADD $ac0, $t5, $a2
    MADD $ac0, $t6, $v1
    MADD $ac0, $t7, $v0
    // load Right data samples
    LW $t0, 292($a0)
    LW $t1, 300($a0)
    LW $t2, 308($a0)
    LW $t3, 316($a0)
    LW $t4, 356($a0)
    LW $t5, 364($a0)
    LW $t6, 372($a0)
    LW $t7, 380($a0)
    // multiply-acccumulate Right
    MADD $ac1, $t0, $v0
    MADD $ac1, $t1, $v1
    MADD $ac1, $t2, $a2
    MADD $ac1, $t3, $a3
    MADD $ac1, $t4, $a3
    MADD $ac1, $t5, $a2
    MADD $ac1, $t6, $v1
    MADD $ac1, $t7, $v0
   
    // load four coefficients 11
    LI $v0, -1767069
    LI $v1, 5335991
    // load Left data samples
    LW $t0, 320($a0)
    LW $t1, 328($a0)
    LW $t2, 336($a0)
    LW $t3, 344($a0)
    // multiply-acccumulate Left
    MADD $ac0, $t0, $v0
    MADD $ac0, $t1, $v1
    MADD $ac0, $t2, $v1
    MADD $ac0, $t3, $v0
    // load Right data samples
    LW $t0, 324($a0)
    LW $t1, 332($a0)
    LW $t2, 340($a0)
    LW $t3, 348($a0)
    // multiply-acccumulate Right
    MADD $ac1, $t0, $v0
    MADD $ac1, $t1, $v1
    MADD $ac1, $t2, $v1
    MADD $ac1, $t3, $v0
    
    // Shift, round and get 32bit value
    EXTR_R.W $v0, $ac0, 25
    // save result to $a1 pointer
    SW $v0, 0($a1)
    // Shift, round and get 32bit value
    EXTR_R.W $v1, $ac1, 25
    // save result to $a1 pointer
    SW $v1, 4($a1)
    
    // load intermediate sample Left
    LW $t0, 336($a0)
    // divide by 4 with round
    SHRA_R.W $v0, $t0, 2
     // save result to $a1 pointer
    SW $v0, 8($a1)
    
    // load intermediate sample Right
    LW $t0, 340($a0)
    // divide by 4 with round
    SHRA_R.W $v0, $t0, 2
     // save result to $a1 pointer
    SW $v0, 12($a1)
    
    // restore address pointer
    J $ra
    
    

    
    
    
    
    
    
    
    
    
    
    
    
/*
    fir2_5842 - FIR x2 half-band upsampler (2-nd stage of SM5842)
     
    external declaration:     extern void fir2_5842(void *pSrc, void *pDst);

    input:
    pSrc - int32 source data pointer ($a0)

    output:
    pDst - int32 result destination data pointer ($a1)

    Source data array must contain dual channel int32 data samples.
    Function read 2 dual channel samples, upsample and return 4 dual channel samples.
*/
fir2_5842:
    // save data to stack
    ADDU $sp, $sp, -32
    SW $s0, 0($sp)
    SW $s1, 4($sp)
    SW $s2, 8($sp)
    SW $s3, 12($sp)
    SW $s4, 16($sp)
    SW $s5, 20($sp)
    SW $s6, 24($sp)
    
    // load coefficients
    LI $s0, 1787
    LI $s1, -14786
    LI $s2, 66404
    LI $s3, -214470
    LI $s4, 569338
    LI $s5, -1425002
    LI $s6, 5211033
    
    // ***************** FIR processing Left pass 1 *********************
    // load data samples
    LW $t0, 0($a0)
    LW $t1, 8($a0)
    LW $t2, 16($a0)
    LW $t3, 24($a0)
    LW $t4, 32($a0)
    LW $t5, 40($a0)
    LW $t6, 48($a0)
    LW $t7, 56($a0)
    LW $t8, 64($a0)
    LW $t9, 72($a0)
    LW $v0, 80($a0)
    LW $v1, 88($a0)
    LW $a2, 96($a0)
    LW $a3, 104($a0)
    // multiply-acccumulate
    MULT $t0, $s0
    MADD $t1, $s1
    MADD $t2, $s2
    MADD $t3, $s3
    MADD $t4, $s4
    MADD $t5, $s5
    MADD $t6, $s6
    MADD $t7, $s6
    MADD $t8, $s5
    MADD $t9, $s4
    MADD $v0, $s3
    MADD $v1, $s2
    MADD $a2, $s1
    MADD $a3, $s0
    
    // ************** Copy sample **************
    LW $t0, 56($a0)
    SW $t0, 8($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t0, $ac0, 23
    // save result to $a1 pointer
    SW $t0, 0($a1)    

    // ***************** FIR processing Left pass 2 *********************
    // load data sample
    LW $t0, 112($a0)
    // multiply-acccumulate
    MULT $t1, $s0
    MADD $t2, $s1
    MADD $t3, $s2
    MADD $t4, $s3
    MADD $t5, $s4
    MADD $t6, $s5
    MADD $t7, $s6
    MADD $t8, $s6
    MADD $t9, $s5
    MADD $v0, $s4
    MADD $v1, $s3
    MADD $a2, $s2
    MADD $a3, $s1
    MADD $t0, $s0
    
    // ************** Copy sample **************
    LW $t1, 64($a0)
    SW $t1, 24($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t1, $ac0, 23
    // save result to $a1 pointer
    SW $t1, 16($a1)

    // ***************** FIR processing Right pass 1 *********************
    // load data samples
    LW $t0, 4($a0)
    LW $t1, 12($a0)
    LW $t2, 20($a0)
    LW $t3, 28($a0)
    LW $t4, 36($a0)
    LW $t5, 44($a0)
    LW $t6, 52($a0)
    LW $t7, 60($a0)
    LW $t8, 68($a0)
    LW $t9, 76($a0)
    LW $v0, 84($a0)
    LW $v1, 92($a0)
    LW $a2, 100($a0)
    LW $a3, 108($a0)
    // multiply-acccumulate
    MULT $t0, $s0
    MADD $t1, $s1
    MADD $t2, $s2
    MADD $t3, $s3
    MADD $t4, $s4
    MADD $t5, $s5
    MADD $t6, $s6
    MADD $t7, $s6
    MADD $t8, $s5
    MADD $t9, $s4
    MADD $v0, $s3
    MADD $v1, $s2
    MADD $a2, $s1
    MADD $a3, $s0
    
    // ************** Copy sample **************
    LW $t0, 60($a0)
    SW $t0, 12($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t0, $ac0, 23
    // save result to $a1 pointer
    SW $t0, 4($a1)
        
    // ***************** FIR processing Right pass 2 *********************
    // load data sample
    LW $t0, 116($a0)
    // multiply-acccumulate
    MULT $t1, $s0
    MADD $t2, $s1
    MADD $t3, $s2
    MADD $t4, $s3
    MADD $t5, $s4
    MADD $t6, $s5
    MADD $t7, $s6
    MADD $t8, $s6
    MADD $t9, $s5
    MADD $v0, $s4
    MADD $v1, $s3
    MADD $a2, $s2
    MADD $a3, $s1
    MADD $t0, $s0
    
    // ************** Copy sample **************
    LW $t1, 68($a0)
    SW $t1, 28($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t1, $ac0, 23
    // save result to $a1 pointer
    SW $t1, 20($a1)
        
    // restore data from stack
    LW $s0, 0($sp)
    LW $s1, 4($sp)
    LW $s2, 8($sp)
    LW $s3, 12($sp)
    LW $s4, 16($sp)
    LW $s5, 20($sp)
    LW $s6, 24($sp)
    ADDU $sp, $sp, 32
  
    // restore address pointer
    J $ra

    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
/*
    fir3_5842 - FIR x2 half-band upsampler (3-rd stage of SM5842)
     
    external declaration:     extern void fir3_5842(void *pSrc, void *pDst);

    input:
    pSrc - int32 source data pointer ($a0)

    output:
    pDst - int32 result destination data pointer ($a1)

    Source data array must contain dual channel int32 data samples.
    Function read 4 dual channel samples, upsample and return 8 dual channel samples.
*/
fir3_5842:
    // load coefficients
    LI $v0, -26591
    LI $v1, 229059
    LI $a2, -1052136
    LI $a3, 5043972
    
    // ************ Left channel pass 1 *********************
    // load 8 data samples
    LW $t0, 0($a0)
    LW $t1, 8($a0)
    LW $t2, 16($a0)
    LW $t3, 24($a0)
    LW $t4, 32($a0)
    LW $t5, 40($a0)
    LW $t6, 48($a0)
    LW $t7, 56($a0)
    // multiply-acccumulate
    MULT $t0, $v0
    MADD $t1, $v1
    MADD $t2, $a2
    MADD $t3, $a3
    MADD $t4, $a3
    MADD $t5, $a2
    MADD $t6, $v1
    MADD $t7, $v0
    
    // *********** Copy sample *****************
    LW $t8, 32($a0)
    SW $t8, 8($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t8, $ac0, 23
    // save result to $a1 pointer
    SW $t8, 0($a1)
    
    // ************ Left channel pass 2 *********************
    // load next sample
    LW $t0, 64($a0)
    // multiply-acccumulate
    MULT $t1, $v0
    MADD $t2, $v1
    MADD $t3, $a2
    MADD $t4, $a3
    MADD $t5, $a3
    MADD $t6, $a2
    MADD $t7, $v1
    MADD $t0, $v0
    
    // *********** Copy sample *****************
    LW $t8, 40($a0)
    SW $t8, 24($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t8, $ac0, 23
    // save result to $a1 pointer
    SW $t8, 16($a1)
    
    // ************ Left channel pass 3 *********************
    // load next sample
    LW $t1, 72($a0)
    // multiply-acccumulate
    MULT $t2, $v0
    MADD $t3, $v1
    MADD $t4, $a2
    MADD $t5, $a3
    MADD $t6, $a3
    MADD $t7, $a2
    MADD $t0, $v1
    MADD $t1, $v0
    
    // *********** Copy sample *****************
    LW $t8, 48($a0)
    SW $t8, 40($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t8, $ac0, 23
    // save result to $a1 pointer
    SW $t8, 32($a1)
    
    // ************ Left channel pass 4 *********************
    // load next sample
    LW $t2, 80($a0)
    // multiply-acccumulate
    MULT $t3, $v0
    MADD $t4, $v1
    MADD $t5, $a2
    MADD $t6, $a3
    MADD $t7, $a3
    MADD $t0, $a2
    MADD $t1, $v1
    MADD $t2, $v0
    
    // *********** Copy sample *****************
    LW $t8, 56($a0)
    SW $t8, 56($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t8, $ac0, 23
    // save result to $a1 pointer
    SW $t8, 48($a1)
    
    // ************ Right channel pass 1 ********************
    // load 8 data samples
    LW $t0, 4($a0)
    LW $t1, 12($a0)
    LW $t2, 20($a0)
    LW $t3, 28($a0)
    LW $t4, 36($a0)
    LW $t5, 44($a0)
    LW $t6, 52($a0)
    LW $t7, 60($a0)
    // multiply-acccumulate
    MULT $t0, $v0
    MADD $t1, $v1
    MADD $t2, $a2
    MADD $t3, $a3
    MADD $t4, $a3
    MADD $t5, $a2
    MADD $t6, $v1
    MADD $t7, $v0
    
    // *********** Copy sample *****************
    LW $t8, 36($a0)
    SW $t8, 12($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t8, $ac0, 23
    // save result to $a1 pointer
    SW $t8, 4($a1)
    
    // ************ Right channel pass 2 ********************
    // load next sample
    LW $t0, 68($a0)
    // multiply-acccumulate
    MULT $t1, $v0
    MADD $t2, $v1
    MADD $t3, $a2
    MADD $t4, $a3
    MADD $t5, $a3
    MADD $t6, $a2
    MADD $t7, $v1
    MADD $t0, $v0
    
    // *********** Copy sample *****************
    LW $t8, 44($a0)
    SW $t8, 28($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t8, $ac0, 23
    // save result to $a1 pointer
    SW $t8, 20($a1)
    
    // ************ Right channel pass 3 ********************
    // load next sample
    LW $t1, 76($a0)
    // multiply-acccumulate
    MULT $t2, $v0
    MADD $t3, $v1
    MADD $t4, $a2
    MADD $t5, $a3
    MADD $t6, $a3
    MADD $t7, $a2
    MADD $t0, $v1
    MADD $t1, $v0
    
    // *********** Copy sample *****************
    LW $t8, 52($a0)
    SW $t8, 44($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t8, $ac0, 23
    // save result to $a1 pointer
    SW $t8, 36($a1)
    
    // ************ Right channel pass 4 ********************
    // load next sample
    LW $t2, 84($a0)
    // multiply-acccumulate
    MULT $t3, $v0
    MADD $t4, $v1
    MADD $t5, $a2
    MADD $t6, $a3
    MADD $t7, $a3
    MADD $t0, $a2
    MADD $t1, $v1
    MADD $t2, $v0
    
    // *********** Copy sample *****************
    LW $t8, 60($a0)
    SW $t8, 60($a1)
    
    // Restore gain, round and get 32bit value
    EXTR_R.W $t8, $ac0, 23
    // save result to $a1 pointer
    SW $t8, 52($a1)


    // restore address pointer
    J $ra

    

