//*************************************************************
// Name: sincos.S
// Author: Larry Pyeatt
// Date: 2/22/2014
//*************************************************************
/*
This is a version of the sin/cos functions that uses symmetry
to enhance precision. The actual sin and cos routines convert
the input to lie in the range 0 to pi/2, then pass it to the
worker routine that computes the result. The result is then
converted back to correspond with the original input.
We calculate sin(x) using the first seven terms of the Taylor
Series: sin(x) = x - x^3/3! + x^5/5! - x^7/7! + x^9/9! - ...
and we calculate cos(x) using the relationship:
cos(x) = sin(pi/2-x)
We start by defining a helper function, which we call sinq.
The sinq function calculates sin(x) for 0<=x<=pi/2. The
input, x, must be an S(1,30) number. The factors of x that
sinq will use are: x, x^3, x^5, x^7, x^9, x^11, and x^13.
Dividing by (2n+1)! is changed to a multiply by a coefficient
as we compute each term, we will add it to the sum, stored as
an S(2,61). Therefore, we want the product of each power of x
and its coefficient to be converted to an S(2,61) for the add.
It turns out that this just requires a small shift.
We build a table to decide how much to shift each product
before adding it to the total. x^2 will be stored as an
S(2,29), and x is given as an S(1,30). After multiplying x by
x^2, we will shift left one bit, so the procedure is:
x will be an S(1,30) - multiply by x^2 and shift left
x^3 will be an S(3,28) - multiply by x^2 and shift left
x^5 will be an S(5,26) - multiply by x^2 and shift left
x^7 will be an S(7,24) - multiply by x^2 and shift left
x^9 will be an S(9,22) - multiply by x^2 and shift left
x^11 will be an S(11,20)- multiply by x^2 and shift left
x^13 will be an S(13,18)- multiply by x^2 and shift left
x^15 will be an S(15,16)- multiply by x^2 and shift left
x^17 will be an S(17,14)
*/
/*
The following table shows the constant coefficients
needed for calculating each term.
-1/3! = AAAAAAAA as an S(-2,32)
1/5! = 44444445 as an S(-6,32)
-1/7! = 97F97F97 as an S(-12,32)
1/9! = 5C778E96 as an S(-18,32)
-1/11! = 9466EA60 as an S(-25,32)
1/13! = 5849184F as an S(-32,32)
-1/15! = 94603063 as an S(-40,32)
1/17! = 654B1DC1 as an S(-48,32)
Combining the two tables of power and coefficient formats, we
can now determine how much shift we need after each step in
order to do all sums in S(2,61) format:
power powerfmt coef coeffmt resultfmt right shift
x S(1,30) * 1 (skip the multiply) 1 -> S(2,61)
x^3 S(3,28) * -1/3! S(-2,32) = S(2,61) 0 -> S(2,61)
x^5 S(5,26) * 1/5! S(-6,32) = S(0,63) 2 -> S(2,61)
x^7 S(7,24) * -1/7! S(-12,32) = S(-4,64) 6 -> S(2,61)
x^9 S(9,22) * 1/9! S(-18,32) = S(-8,64) 10-> S(2,61)
x^11 S(11,20) * -1/11! S(-25,32) = S(-13,64) 15-> S(2,61)
x^13 S(13,18) * 1/13! S(-32,32) = S(-18,64) 20-> S(2,61)
x^15 S(15,16) * -1/15! S(-40,32) = S(-24,64) 26-> S(2,61)
x^17 S(17,14) * 1/17! S(-48,32) = S(-30,64) 32-> S(2,61)
Note: the last row has a shift of 32, which indicates that
it can contribute no more than 1/2 bit of precision
to the 32-bit result.
*/
.data
.align 2
// We will define a few constants that may be useful
.global pi
pi: .word 0x3243F6A8 @ pi as an S(3,28)
.global pi_2
pi_2: .word 0x1921FB54 @ pi/2 as an S(3,28)
.global pi_x2
pi_x2: .word 0x6487ED51 @ 2*pi as an S(3,28)
sintab: // This is the table of coefficients and shifts for sinq to use
.word 0xAAAAAAAA, 0 @ -1/3! as an S(-2,32)
.word 0x44444445, 2 @ 1/5! as an S(-6,32)
.word 0x97F97F97, 6 @ -1/7! as an S(-12,32)
.word 0x5C778E96, 10 @ 1/9! as an S(-18,32)
.word 0x9466EA60, 15 @ -1/11! as an S(-25,32)
.word 0x5849184F, 20 @ 1/13! as an S(-32,32)
.word 0x94603063, 26 @ -1/15! as an S(-40,32)
.word 0x654B1DC1, 32 @ 1/17! as an S(-48,32)
.equ tablen,(.-sintab) @ set tablen to size of table
@@ The '.' refers to the current address counter value.
@@ Subtracting the address of sintab from the current
@@ address gives the size of the table.
.text
//-------------------------------------------------------------
// sinq(x)
// input: x -> S(1,30) s.t. 0 <= x <= pi/2
// returns sin(x) -> S(1,30)
sinq: stmfd sp!,{r4-r11,lr}
@@ The first term in the Taylor series is simply x, so
@@ convert x to an S(2,61) by doing an asr in 64 bits,
@@ and use it to initialize the sum.
mov r10, r0,lsl #31 @ low 32 bits of sum
mov r11, r0,asr #1 @ high 32 bits of sum
@@ r11:r10 now contains the sum (currently x) as an S(2,61)
smull r2,r4,r0,r0 @ r4 will hold x^2.
@@ We are going to convert x^2 to an S(2,28), and round it
adds r2,r2,#0x40000000 @ Round the result up by adding 1 to
@ the first bit that will be lost.
addcs r4,r4,#1 @ Propagate the carry.
lsl r4,r4,#1 @ Make room for one bit at LSB position
orr r4,r4,r2,lsr#31 @ Copy top bit of r2
@@ r4 now contains x^2 as an S(2,28)
mov r5,r0 @ r5 will keep x^(2n-1).
@@ r5 now contains x as an S(1,30)
ldr r6, =sintab @ get pointer to beginning of table
add r7, r6, #tablen @ get pointer to end of table
@@ We know that we will always execute the loop 6 times,
@@ so we use a post-test loop.
sloop: ldmia r6!,{r8,r9} @ Load two values from the table
@@ r8 now has 1/(2n+1)!
@@ r9 contains the correcting shift
smull r0,r5,r4,r5 @ r5:r0 <- x^(2n+1) as an S(4,59)
lsl r5,r5,#1 @ Shift and copy the MSB of the LSW to
orr r5,r5,r0,lsr#31 @ LSB of MSW to convert to an S(3,60).
@@ r5 now contains x^(2n+1) as an S(3,60)
smulls r0,r1,r5,r8 @ multiply by reciprocal that we
@ loaded earlier
@@ if the result is negative, then add one
bge noadd
adds r0,r0,#1
adc r1,r1,#0
noadd: @@ Apply correcting right shift to make an S(2,61)
@@ r9 was loaded from table earlier
lsr r0,r0,r9 @ Make room in low word for some bits
rsb r2,r9,#32 @ calculate inverse shift amount
lsl r3,r1,r2 @ shift upper 32 bits to the left
orr r0,r0,r3 @ paste bits into low word
asr r1,r1,r9 @ shift upper word right
@@ accumulate result in r10:r11
adds r10,r10,r0
adc r11,r11,r1
@@ check to see if there is another term to compute
cmp r6, r7
blt sloop @ Repeat for every table entry
@@ shift result left 1 bit and move to r0
lsl r11,r11,#1
orr r0,r11,r10,lsr #31
@@ return the result
ldmfd sp!,{r4-r11,pc}
//-------------------------------------------------------------
// cos(x) NOTE: The cos(x) function does not return.
// It is an alternate entry point to sin(x).
// input: x -> S(3,28)
// returns cos(x) -> S(3,28)
.global cos
cos: ldr r1,=pi_x2 @ load pointer to 2*pi
ldr r1,[r1] @ load 2*pi
cmp r0,#0 @ Add 2*pi to x if needed, to make
addle r0,r0,r1 @ sure x does not become too negative.
cosgood:ldr r1,=pi_2 @ load pointer to pi/2
ldr r1,[r1] @ load pi/2
sub r0,r1,r0 @ cos(x) = sin(pi/2-x)
@@ now we just fall through into the sin function
//-------------------------------------------------------------
// sin(x)
// input: x -> S(3,28)
// returns sin(x) -> S(3,28)
.global sin
sin: stmfd sp!,{lr}
ldr r1,=pi_2 @ r1 has pointer to pi/2
ldr r2,=pi @ r2 has pointer to pi
ldr r3,=pi_x2 @ r3 has pointer to pi*2
ldr r1,[r1] @ r1 has pi/2
ldr r2,[r2] @ r2 has pi
ldr r3,[r3] @ r3 has pi*2
@@ step 1: make sure x>=0.0 and x<=2pi
negl: cmp r0,#0 @ while(x < 0)
addlt r0,r0,r3 @ x = x + 2 * pi
blt negl @ end while
nonneg: cmp r0,r3 @ while(x > pi/2)
subgt r0,r0,r3 @ x = x - 2 * pi
bgt nonneg @ end while
@@ step 2: find the quadrant and call sinq appropriately
inrange:cmp r0,r1
bgt chkq2
@@ it is in the first quadrant... just shift and call sinq
lsl r0,r0,#2
bl sinq
b sin_done
chkq2: cmp r0,r2
bgt chkq3
@@ it is in the second quadrant... mirror, shift, and call sinq
sub r0,r2,r0
lsl r0,r0,#2
bl sinq
b sin_done
chkq3: add r1,r1,r2 @ we won't need pi/2 again
cmp r0,r1 @ so use r1 to calculate 3pi/2
bgt chkq4
@@ it is in the third quadrant... rotate, shift, call sinq,
@@ then complement the result
sub r0,r0,r2
lsl r0,r0,#2
bl sinq
rsb r0,r0,#0
b sin_done
@@ it is in the fourth quadrant... rotate, mirror, shift,
@@ call sinq, then complement the result
chkq4: sub r0,r0,r2
sub r0,r2,r0
lsl r0,r0,#2
bl sinq
rsb r0,r0,#0
sin_done:
@@ shift result right 2 bits
asr r0,r0,#2
@@ return the result
ldmfd sp!,{pc}
//-------------------------------------------------------------