Title: ECE6580 Lecture 9
1ECE6580 Lecture 9
2MyFirAsm.asm
.global _MyFirAsm _MyFirAsm entry dm(b2Save)
b2 dm(i2Save) i2 l4 reads(1) // save
length of filter in l4 l2 l4 // save length
in l2 b4 r8 // pointer to states goes in b4
and i4 b2 r12 // pointer to coefs goes in b2
and i2
3Un-Rolled Loop
f8f4 // x needs to be in f8 f1dm(i4,m5) //
fetch states0, but do not inc i4 dm(i4,m6)
f8 // states0 x f2dm(i2,m6) // fetch
coefs0 f4 f1f2 // coefs0states0 f0
f0 f4 // acc acc coefs0state0 f8
dm(i4,m5) // fetch states1, but do not inc
i4 dm(i4,m6) f1 // states1
states0 f2dm(i2,m6) // fetch coefs1 f4
f8f2 // coefs1states1 f0 f0 f4 // acc
acc coefs1states1 f1 dm(i4,m5) //
fetch states2, but do not increment
i4 dm(i4,m6) f8 // states2
states1 f2dm(i2,m6) // fetch coefs2 f4
f1f2 // coefs2states2 f0 f0 f4 // acc
acc coefs2state2 f8 dm(i4,m5) //
fetch states3, but do not inc i4 dm(i4,m6)
f1 // states3 states2
4How Can We Roll It Up?
f8f4 // x needs to be in f8 f1dm(i4,m5) //
fetch states0, but do not inc i4 dm(i4,m6)
f8 // states0 x f2dm(i2,m6) // fetch
coefs0 f4 f1f2 // coefs0states0 f0
f0 f4 // acc acc coefs0state0 f8
dm(i4,m5) // fetch states1, but do not inc
i4 dm(i4,m6) f1 // states1
states0 f2dm(i2,m6) // fetch coefs1 f4
f8f2 // coefs1states1 f0 f0 f4 // acc
acc coefs1states1 f1 dm(i4,m5) //
fetch states2, but do not increment
i4 dm(i4,m6) f8 // states2
states1 f2dm(i2,m6) // fetch coefs2 f4
f1f2 // coefs2states2 f0 f0 f4 // acc
acc coefs2state2 f8 dm(i4,m5) //
fetch states3, but do not inc i4 dm(i4,m6)
f1 // states3 states2
5Rolled and Ready to Go
f2dm(i2,m6) // fetch coefs0 f0
f2f4 // acc coefs0x f8 f4 lcntr
r2,do MyFirAsmEnd until lce f1dm(i4,m5) //
fetch statesi, but do not inc i4 dm(i4,m6)
f8 // statesi statei-1 f2dm(i2,m6)
// fetch coefsi f4 f8f2 //
coefsistatesi f0 f0 f4 // acc acc
coefsistatei f8 dm(i4,m5) // fetch
statesi, but do not inc i4 dm(i4,m6) f1
// statesi1 statesi f2dm(i2,m6) //
fetch coefsi1 f4 f8f2 //
coefsi1statesi1 MyFirAsmEnd f0 f0
f4 // acc acc coefsi1statesi1
6Bench Mark Numbers
- MyFir 8535 cycles
- MyFir 1076 cycles (optimized)
- MyFirAsm 1304 cycles