Untitled


SUBMITTED BY: Guest

DATE: Oct. 28, 2014, 4:29 a.m.

FORMAT: Text only

SIZE: 9.3 kB

HITS: 931

  1. //*************************************************************
  2. // Name: sincos.S
  3. // Author: Larry Pyeatt
  4. // Date: 2/22/2014
  5. //*************************************************************
  6. /*
  7. This is a version of the sin/cos functions that uses symmetry
  8. to enhance precision. The actual sin and cos routines convert
  9. the input to lie in the range 0 to pi/2, then pass it to the
  10. worker routine that computes the result. The result is then
  11. converted back to correspond with the original input.
  12. We calculate sin(x) using the first seven terms of the Taylor
  13. Series: sin(x) = x - x^3/3! + x^5/5! - x^7/7! + x^9/9! - ...
  14. and we calculate cos(x) using the relationship:
  15. cos(x) = sin(pi/2-x)
  16. We start by defining a helper function, which we call sinq.
  17. The sinq function calculates sin(x) for 0<=x<=pi/2. The
  18. input, x, must be an S(1,30) number. The factors of x that
  19. sinq will use are: x, x^3, x^5, x^7, x^9, x^11, and x^13.
  20. Dividing by (2n+1)! is changed to a multiply by a coefficient
  21. as we compute each term, we will add it to the sum, stored as
  22. an S(2,61). Therefore, we want the product of each power of x
  23. and its coefficient to be converted to an S(2,61) for the add.
  24. It turns out that this just requires a small shift.
  25. We build a table to decide how much to shift each product
  26. before adding it to the total. x^2 will be stored as an
  27. S(2,29), and x is given as an S(1,30). After multiplying x by
  28. x^2, we will shift left one bit, so the procedure is:
  29. x will be an S(1,30) - multiply by x^2 and shift left
  30. x^3 will be an S(3,28) - multiply by x^2 and shift left
  31. x^5 will be an S(5,26) - multiply by x^2 and shift left
  32. x^7 will be an S(7,24) - multiply by x^2 and shift left
  33. x^9 will be an S(9,22) - multiply by x^2 and shift left
  34. x^11 will be an S(11,20)- multiply by x^2 and shift left
  35. x^13 will be an S(13,18)- multiply by x^2 and shift left
  36. x^15 will be an S(15,16)- multiply by x^2 and shift left
  37. x^17 will be an S(17,14)
  38. */
  39. /*
  40. The following table shows the constant coefficients
  41. needed for calculating each term.
  42. -1/3! = AAAAAAAA as an S(-2,32)
  43. 1/5! = 44444445 as an S(-6,32)
  44. -1/7! = 97F97F97 as an S(-12,32)
  45. 1/9! = 5C778E96 as an S(-18,32)
  46. -1/11! = 9466EA60 as an S(-25,32)
  47. 1/13! = 5849184F as an S(-32,32)
  48. -1/15! = 94603063 as an S(-40,32)
  49. 1/17! = 654B1DC1 as an S(-48,32)
  50. Combining the two tables of power and coefficient formats, we
  51. can now determine how much shift we need after each step in
  52. order to do all sums in S(2,61) format:
  53. power powerfmt coef coeffmt resultfmt right shift
  54. x S(1,30) * 1 (skip the multiply) 1 -> S(2,61)
  55. x^3 S(3,28) * -1/3! S(-2,32) = S(2,61) 0 -> S(2,61)
  56. x^5 S(5,26) * 1/5! S(-6,32) = S(0,63) 2 -> S(2,61)
  57. x^7 S(7,24) * -1/7! S(-12,32) = S(-4,64) 6 -> S(2,61)
  58. x^9 S(9,22) * 1/9! S(-18,32) = S(-8,64) 10-> S(2,61)
  59. x^11 S(11,20) * -1/11! S(-25,32) = S(-13,64) 15-> S(2,61)
  60. x^13 S(13,18) * 1/13! S(-32,32) = S(-18,64) 20-> S(2,61)
  61. x^15 S(15,16) * -1/15! S(-40,32) = S(-24,64) 26-> S(2,61)
  62. x^17 S(17,14) * 1/17! S(-48,32) = S(-30,64) 32-> S(2,61)
  63. Note: the last row has a shift of 32, which indicates that
  64. it can contribute no more than 1/2 bit of precision
  65. to the 32-bit result.
  66. */
  67. .data
  68. .align 2
  69. // We will define a few constants that may be useful
  70. .global pi
  71. pi: .word 0x3243F6A8 @ pi as an S(3,28)
  72. .global pi_2
  73. pi_2: .word 0x1921FB54 @ pi/2 as an S(3,28)
  74. .global pi_x2
  75. pi_x2: .word 0x6487ED51 @ 2*pi as an S(3,28)
  76. sintab: // This is the table of coefficients and shifts for sinq to use
  77. .word 0xAAAAAAAA, 0 @ -1/3! as an S(-2,32)
  78. .word 0x44444445, 2 @ 1/5! as an S(-6,32)
  79. .word 0x97F97F97, 6 @ -1/7! as an S(-12,32)
  80. .word 0x5C778E96, 10 @ 1/9! as an S(-18,32)
  81. .word 0x9466EA60, 15 @ -1/11! as an S(-25,32)
  82. .word 0x5849184F, 20 @ 1/13! as an S(-32,32)
  83. .word 0x94603063, 26 @ -1/15! as an S(-40,32)
  84. .word 0x654B1DC1, 32 @ 1/17! as an S(-48,32)
  85. .equ tablen,(.-sintab) @ set tablen to size of table
  86. @@ The '.' refers to the current address counter value.
  87. @@ Subtracting the address of sintab from the current
  88. @@ address gives the size of the table.
  89. .text
  90. //-------------------------------------------------------------
  91. // sinq(x)
  92. // input: x -> S(1,30) s.t. 0 <= x <= pi/2
  93. // returns sin(x) -> S(1,30)
  94. sinq: stmfd sp!,{r4-r11,lr}
  95. @@ The first term in the Taylor series is simply x, so
  96. @@ convert x to an S(2,61) by doing an asr in 64 bits,
  97. @@ and use it to initialize the sum.
  98. mov r10, r0,lsl #31 @ low 32 bits of sum
  99. mov r11, r0,asr #1 @ high 32 bits of sum
  100. @@ r11:r10 now contains the sum (currently x) as an S(2,61)
  101. smull r2,r4,r0,r0 @ r4 will hold x^2.
  102. @@ We are going to convert x^2 to an S(2,28), and round it
  103. adds r2,r2,#0x40000000 @ Round the result up by adding 1 to
  104. @ the first bit that will be lost.
  105. addcs r4,r4,#1 @ Propagate the carry.
  106. lsl r4,r4,#1 @ Make room for one bit at LSB position
  107. orr r4,r4,r2,lsr#31 @ Copy top bit of r2
  108. @@ r4 now contains x^2 as an S(2,28)
  109. mov r5,r0 @ r5 will keep x^(2n-1).
  110. @@ r5 now contains x as an S(1,30)
  111. ldr r6, =sintab @ get pointer to beginning of table
  112. add r7, r6, #tablen @ get pointer to end of table
  113. @@ We know that we will always execute the loop 6 times,
  114. @@ so we use a post-test loop.
  115. sloop: ldmia r6!,{r8,r9} @ Load two values from the table
  116. @@ r8 now has 1/(2n+1)!
  117. @@ r9 contains the correcting shift
  118. smull r0,r5,r4,r5 @ r5:r0 <- x^(2n+1) as an S(4,59)
  119. lsl r5,r5,#1 @ Shift and copy the MSB of the LSW to
  120. orr r5,r5,r0,lsr#31 @ LSB of MSW to convert to an S(3,60).
  121. @@ r5 now contains x^(2n+1) as an S(3,60)
  122. smulls r0,r1,r5,r8 @ multiply by reciprocal that we
  123. @ loaded earlier
  124. @@ if the result is negative, then add one
  125. bge noadd
  126. adds r0,r0,#1
  127. adc r1,r1,#0
  128. noadd: @@ Apply correcting right shift to make an S(2,61)
  129. @@ r9 was loaded from table earlier
  130. lsr r0,r0,r9 @ Make room in low word for some bits
  131. rsb r2,r9,#32 @ calculate inverse shift amount
  132. lsl r3,r1,r2 @ shift upper 32 bits to the left
  133. orr r0,r0,r3 @ paste bits into low word
  134. asr r1,r1,r9 @ shift upper word right
  135. @@ accumulate result in r10:r11
  136. adds r10,r10,r0
  137. adc r11,r11,r1
  138. @@ check to see if there is another term to compute
  139. cmp r6, r7
  140. blt sloop @ Repeat for every table entry
  141. @@ shift result left 1 bit and move to r0
  142. lsl r11,r11,#1
  143. orr r0,r11,r10,lsr #31
  144. @@ return the result
  145. ldmfd sp!,{r4-r11,pc}
  146. //-------------------------------------------------------------
  147. // cos(x) NOTE: The cos(x) function does not return.
  148. // It is an alternate entry point to sin(x).
  149. // input: x -> S(3,28)
  150. // returns cos(x) -> S(3,28)
  151. .global cos
  152. cos: ldr r1,=pi_x2 @ load pointer to 2*pi
  153. ldr r1,[r1] @ load 2*pi
  154. cmp r0,#0 @ Add 2*pi to x if needed, to make
  155. addle r0,r0,r1 @ sure x does not become too negative.
  156. cosgood:ldr r1,=pi_2 @ load pointer to pi/2
  157. ldr r1,[r1] @ load pi/2
  158. sub r0,r1,r0 @ cos(x) = sin(pi/2-x)
  159. @@ now we just fall through into the sin function
  160. //-------------------------------------------------------------
  161. // sin(x)
  162. // input: x -> S(3,28)
  163. // returns sin(x) -> S(3,28)
  164. .global sin
  165. sin: stmfd sp!,{lr}
  166. ldr r1,=pi_2 @ r1 has pointer to pi/2
  167. ldr r2,=pi @ r2 has pointer to pi
  168. ldr r3,=pi_x2 @ r3 has pointer to pi*2
  169. ldr r1,[r1] @ r1 has pi/2
  170. ldr r2,[r2] @ r2 has pi
  171. ldr r3,[r3] @ r3 has pi*2
  172. @@ step 1: make sure x>=0.0 and x<=2pi
  173. negl: cmp r0,#0 @ while(x < 0)
  174. addlt r0,r0,r3 @ x = x + 2 * pi
  175. blt negl @ end while
  176. nonneg: cmp r0,r3 @ while(x > pi/2)
  177. subgt r0,r0,r3 @ x = x - 2 * pi
  178. bgt nonneg @ end while
  179. @@ step 2: find the quadrant and call sinq appropriately
  180. inrange:cmp r0,r1
  181. bgt chkq2
  182. @@ it is in the first quadrant... just shift and call sinq
  183. lsl r0,r0,#2
  184. bl sinq
  185. b sin_done
  186. chkq2: cmp r0,r2
  187. bgt chkq3
  188. @@ it is in the second quadrant... mirror, shift, and call sinq
  189. sub r0,r2,r0
  190. lsl r0,r0,#2
  191. bl sinq
  192. b sin_done
  193. chkq3: add r1,r1,r2 @ we won't need pi/2 again
  194. cmp r0,r1 @ so use r1 to calculate 3pi/2
  195. bgt chkq4
  196. @@ it is in the third quadrant... rotate, shift, call sinq,
  197. @@ then complement the result
  198. sub r0,r0,r2
  199. lsl r0,r0,#2
  200. bl sinq
  201. rsb r0,r0,#0
  202. b sin_done
  203. @@ it is in the fourth quadrant... rotate, mirror, shift,
  204. @@ call sinq, then complement the result
  205. chkq4: sub r0,r0,r2
  206. sub r0,r2,r0
  207. lsl r0,r0,#2
  208. bl sinq
  209. rsb r0,r0,#0
  210. sin_done:
  211. @@ shift result right 2 bits
  212. asr r0,r0,#2
  213. @@ return the result
  214. ldmfd sp!,{pc}
  215. //-------------------------------------------------------------

comments powered by Disqus