//************************************************************* // Name: sincos.S // Author: Larry Pyeatt // Date: 2/22/2014 //************************************************************* /* This is a version of the sin/cos functions that uses symmetry to enhance precision. The actual sin and cos routines convert the input to lie in the range 0 to pi/2, then pass it to the worker routine that computes the result. The result is then converted back to correspond with the original input. We calculate sin(x) using the first seven terms of the Taylor Series: sin(x) = x - x^3/3! + x^5/5! - x^7/7! + x^9/9! - ... and we calculate cos(x) using the relationship: cos(x) = sin(pi/2-x) We start by defining a helper function, which we call sinq. The sinq function calculates sin(x) for 0<=x<=pi/2. The input, x, must be an S(1,30) number. The factors of x that sinq will use are: x, x^3, x^5, x^7, x^9, x^11, and x^13. Dividing by (2n+1)! is changed to a multiply by a coefficient as we compute each term, we will add it to the sum, stored as an S(2,61). Therefore, we want the product of each power of x and its coefficient to be converted to an S(2,61) for the add. It turns out that this just requires a small shift. We build a table to decide how much to shift each product before adding it to the total. x^2 will be stored as an S(2,29), and x is given as an S(1,30). After multiplying x by x^2, we will shift left one bit, so the procedure is: x will be an S(1,30) - multiply by x^2 and shift left x^3 will be an S(3,28) - multiply by x^2 and shift left x^5 will be an S(5,26) - multiply by x^2 and shift left x^7 will be an S(7,24) - multiply by x^2 and shift left x^9 will be an S(9,22) - multiply by x^2 and shift left x^11 will be an S(11,20)- multiply by x^2 and shift left x^13 will be an S(13,18)- multiply by x^2 and shift left x^15 will be an S(15,16)- multiply by x^2 and shift left x^17 will be an S(17,14) */ /* The following table shows the constant coefficients needed for calculating each term. -1/3! = AAAAAAAA as an S(-2,32) 1/5! = 44444445 as an S(-6,32) -1/7! = 97F97F97 as an S(-12,32) 1/9! = 5C778E96 as an S(-18,32) -1/11! = 9466EA60 as an S(-25,32) 1/13! = 5849184F as an S(-32,32) -1/15! = 94603063 as an S(-40,32) 1/17! = 654B1DC1 as an S(-48,32) Combining the two tables of power and coefficient formats, we can now determine how much shift we need after each step in order to do all sums in S(2,61) format: power powerfmt coef coeffmt resultfmt right shift x S(1,30) * 1 (skip the multiply) 1 -> S(2,61) x^3 S(3,28) * -1/3! S(-2,32) = S(2,61) 0 -> S(2,61) x^5 S(5,26) * 1/5! S(-6,32) = S(0,63) 2 -> S(2,61) x^7 S(7,24) * -1/7! S(-12,32) = S(-4,64) 6 -> S(2,61) x^9 S(9,22) * 1/9! S(-18,32) = S(-8,64) 10-> S(2,61) x^11 S(11,20) * -1/11! S(-25,32) = S(-13,64) 15-> S(2,61) x^13 S(13,18) * 1/13! S(-32,32) = S(-18,64) 20-> S(2,61) x^15 S(15,16) * -1/15! S(-40,32) = S(-24,64) 26-> S(2,61) x^17 S(17,14) * 1/17! S(-48,32) = S(-30,64) 32-> S(2,61) Note: the last row has a shift of 32, which indicates that it can contribute no more than 1/2 bit of precision to the 32-bit result. */ .data .align 2 // We will define a few constants that may be useful .global pi pi: .word 0x3243F6A8 @ pi as an S(3,28) .global pi_2 pi_2: .word 0x1921FB54 @ pi/2 as an S(3,28) .global pi_x2 pi_x2: .word 0x6487ED51 @ 2*pi as an S(3,28) sintab: // This is the table of coefficients and shifts for sinq to use .word 0xAAAAAAAA, 0 @ -1/3! as an S(-2,32) .word 0x44444445, 2 @ 1/5! as an S(-6,32) .word 0x97F97F97, 6 @ -1/7! as an S(-12,32) .word 0x5C778E96, 10 @ 1/9! as an S(-18,32) .word 0x9466EA60, 15 @ -1/11! as an S(-25,32) .word 0x5849184F, 20 @ 1/13! as an S(-32,32) .word 0x94603063, 26 @ -1/15! as an S(-40,32) .word 0x654B1DC1, 32 @ 1/17! as an S(-48,32) .equ tablen,(.-sintab) @ set tablen to size of table @@ The '.' refers to the current address counter value. @@ Subtracting the address of sintab from the current @@ address gives the size of the table. .text //------------------------------------------------------------- // sinq(x) // input: x -> S(1,30) s.t. 0 <= x <= pi/2 // returns sin(x) -> S(1,30) sinq: stmfd sp!,{r4-r11,lr} @@ The first term in the Taylor series is simply x, so @@ convert x to an S(2,61) by doing an asr in 64 bits, @@ and use it to initialize the sum. mov r10, r0,lsl #31 @ low 32 bits of sum mov r11, r0,asr #1 @ high 32 bits of sum @@ r11:r10 now contains the sum (currently x) as an S(2,61) smull r2,r4,r0,r0 @ r4 will hold x^2. @@ We are going to convert x^2 to an S(2,28), and round it adds r2,r2,#0x40000000 @ Round the result up by adding 1 to @ the first bit that will be lost. addcs r4,r4,#1 @ Propagate the carry. lsl r4,r4,#1 @ Make room for one bit at LSB position orr r4,r4,r2,lsr#31 @ Copy top bit of r2 @@ r4 now contains x^2 as an S(2,28) mov r5,r0 @ r5 will keep x^(2n-1). @@ r5 now contains x as an S(1,30) ldr r6, =sintab @ get pointer to beginning of table add r7, r6, #tablen @ get pointer to end of table @@ We know that we will always execute the loop 6 times, @@ so we use a post-test loop. sloop: ldmia r6!,{r8,r9} @ Load two values from the table @@ r8 now has 1/(2n+1)! @@ r9 contains the correcting shift smull r0,r5,r4,r5 @ r5:r0 <- x^(2n+1) as an S(4,59) lsl r5,r5,#1 @ Shift and copy the MSB of the LSW to orr r5,r5,r0,lsr#31 @ LSB of MSW to convert to an S(3,60). @@ r5 now contains x^(2n+1) as an S(3,60) smulls r0,r1,r5,r8 @ multiply by reciprocal that we @ loaded earlier @@ if the result is negative, then add one bge noadd adds r0,r0,#1 adc r1,r1,#0 noadd: @@ Apply correcting right shift to make an S(2,61) @@ r9 was loaded from table earlier lsr r0,r0,r9 @ Make room in low word for some bits rsb r2,r9,#32 @ calculate inverse shift amount lsl r3,r1,r2 @ shift upper 32 bits to the left orr r0,r0,r3 @ paste bits into low word asr r1,r1,r9 @ shift upper word right @@ accumulate result in r10:r11 adds r10,r10,r0 adc r11,r11,r1 @@ check to see if there is another term to compute cmp r6, r7 blt sloop @ Repeat for every table entry @@ shift result left 1 bit and move to r0 lsl r11,r11,#1 orr r0,r11,r10,lsr #31 @@ return the result ldmfd sp!,{r4-r11,pc} //------------------------------------------------------------- // cos(x) NOTE: The cos(x) function does not return. // It is an alternate entry point to sin(x). // input: x -> S(3,28) // returns cos(x) -> S(3,28) .global cos cos: ldr r1,=pi_x2 @ load pointer to 2*pi ldr r1,[r1] @ load 2*pi cmp r0,#0 @ Add 2*pi to x if needed, to make addle r0,r0,r1 @ sure x does not become too negative. cosgood:ldr r1,=pi_2 @ load pointer to pi/2 ldr r1,[r1] @ load pi/2 sub r0,r1,r0 @ cos(x) = sin(pi/2-x) @@ now we just fall through into the sin function //------------------------------------------------------------- // sin(x) // input: x -> S(3,28) // returns sin(x) -> S(3,28) .global sin sin: stmfd sp!,{lr} ldr r1,=pi_2 @ r1 has pointer to pi/2 ldr r2,=pi @ r2 has pointer to pi ldr r3,=pi_x2 @ r3 has pointer to pi*2 ldr r1,[r1] @ r1 has pi/2 ldr r2,[r2] @ r2 has pi ldr r3,[r3] @ r3 has pi*2 @@ step 1: make sure x>=0.0 and x<=2pi negl: cmp r0,#0 @ while(x < 0) addlt r0,r0,r3 @ x = x + 2 * pi blt negl @ end while nonneg: cmp r0,r3 @ while(x > pi/2) subgt r0,r0,r3 @ x = x - 2 * pi bgt nonneg @ end while @@ step 2: find the quadrant and call sinq appropriately inrange:cmp r0,r1 bgt chkq2 @@ it is in the first quadrant... just shift and call sinq lsl r0,r0,#2 bl sinq b sin_done chkq2: cmp r0,r2 bgt chkq3 @@ it is in the second quadrant... mirror, shift, and call sinq sub r0,r2,r0 lsl r0,r0,#2 bl sinq b sin_done chkq3: add r1,r1,r2 @ we won't need pi/2 again cmp r0,r1 @ so use r1 to calculate 3pi/2 bgt chkq4 @@ it is in the third quadrant... rotate, shift, call sinq, @@ then complement the result sub r0,r0,r2 lsl r0,r0,#2 bl sinq rsb r0,r0,#0 b sin_done @@ it is in the fourth quadrant... rotate, mirror, shift, @@ call sinq, then complement the result chkq4: sub r0,r0,r2 sub r0,r2,r0 lsl r0,r0,#2 bl sinq rsb r0,r0,#0 sin_done: @@ shift result right 2 bits asr r0,r0,#2 @@ return the result ldmfd sp!,{pc} //-------------------------------------------------------------