diff options
| -rw-r--r-- | compiler-rt/lib/arm/comparesf2.S | 17 | ||||
| -rw-r--r-- | compiler-rt/lib/arm/switch16.S | 3 | ||||
| -rw-r--r-- | compiler-rt/lib/arm/switch32.S | 5 | ||||
| -rw-r--r-- | compiler-rt/lib/arm/switch8.S | 3 | ||||
| -rw-r--r-- | compiler-rt/lib/arm/switchu8.S | 3 | ||||
| -rw-r--r-- | compiler-rt/lib/arm/udivmodsi4.S | 3 | ||||
| -rw-r--r-- | compiler-rt/lib/arm/udivsi3.S | 3 | ||||
| -rw-r--r-- | compiler-rt/lib/arm/umodsi3.S | 3 |
8 files changed, 33 insertions, 7 deletions
diff --git a/compiler-rt/lib/arm/comparesf2.S b/compiler-rt/lib/arm/comparesf2.S index ee18203392d..ce6f4b9efd0 100644 --- a/compiler-rt/lib/arm/comparesf2.S +++ b/compiler-rt/lib/arm/comparesf2.S @@ -59,12 +59,14 @@ DEFINE_COMPILERRT_FUNCTION(__nesf2) // Next, we check if a and b have the same or different signs. If they have // opposite signs, this eor will set the N flag. + it ne eorsne r12, r0, r1 // If a and b are equal (either both zeros or bit identical; again, we're // ignoring NaNs for now), this subtract will zero out r0. If they have the // same sign, the flags are updated as they would be for a comparison of the // absolute values of a and b. + it pl subspl r0, r2, r3 // If a is smaller in magnitude than b and both have the same sign, place @@ -77,23 +79,27 @@ DEFINE_COMPILERRT_FUNCTION(__nesf2) // still clear from the shift argument in orrs; if a is positive and b // negative, this places 0 in r0; if a is negative and b positive, -1 is // placed in r0. + it lo mvnlo r0, r1, asr #31 // If a is greater in magnitude than b and both have the same sign, place // the sign of b in r0. Thus, if both are negative and a < b, -1 is placed // in r0, which is the desired result. Conversely, if both are positive // and a > b, zero is placed in r0. + it hi movhi r0, r1, asr #31 // If you've been keeping track, at this point r0 contains -1 if a < b and // 0 if a >= b. All that remains to be done is to set it to 1 if a > b. // If a == b, then the Z flag is set, so we can get the correct final value // into r0 by simply or'ing with 1 if Z is clear. - orrne r0, r0, #1 + it ne + orrne r0, r0, #1 // Finally, we need to deal with NaNs. If either argument is NaN, replace // the value in r0 with 1. cmp r2, #0xff000000 + ite ls cmpls r3, #0xff000000 movhi r0, #1 bx lr @@ -108,12 +114,18 @@ DEFINE_COMPILERRT_FUNCTION(__gtsf2) mov r2, r0, lsl #1 mov r3, r1, lsl #1 orrs r12, r2, r3, lsr #1 + it ne eorsne r12, r0, r1 + it pl subspl r0, r2, r3 + it lo mvnlo r0, r1, asr #31 + it hi movhi r0, r1, asr #31 - orrne r0, r0, #1 + it ne + orrne r0, r0, #1 cmp r2, #0xff000000 + ite ls cmpls r3, #0xff000000 movhi r0, #-1 bx lr @@ -125,6 +137,7 @@ DEFINE_COMPILERRT_FUNCTION(__unordsf2) mov r3, r1, lsl #1 mov r0, #0 cmp r2, #0xff000000 + ite ls cmpls r3, #0xff000000 movhi r0, #1 bx lr diff --git a/compiler-rt/lib/arm/switch16.S b/compiler-rt/lib/arm/switch16.S index e8f08c49c5d..9c3f0cf9915 100644 --- a/compiler-rt/lib/arm/switch16.S +++ b/compiler-rt/lib/arm/switch16.S @@ -34,8 +34,9 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16) ldrh ip, [lr, #-1] // get first 16-bit word in table cmp r0, ip // compare with index add r0, lr, r0, lsl #1 // compute address of element in table - ldrshcc r0, [r0, #1] // load 16-bit element if r0 is in range add ip, lr, ip, lsl #1 // compute address of last element in table + ite lo + ldrshlo r0, [r0, #1] // load 16-bit element if r0 is in range ldrshhs r0, [ip, #1] // load 16-bit element if r0 out of range add ip, lr, r0, lsl #1 // compute label = lr + element*2 bx ip // jump to computed label diff --git a/compiler-rt/lib/arm/switch32.S b/compiler-rt/lib/arm/switch32.S index 7008fccb18e..3152dfa1d0b 100644 --- a/compiler-rt/lib/arm/switch32.S +++ b/compiler-rt/lib/arm/switch32.S @@ -34,9 +34,10 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32) ldr ip, [lr, #-1] // get first 32-bit word in table cmp r0, ip // compare with index add r0, lr, r0, lsl #2 // compute address of element in table - ldrcc r0, [r0, #3] // load 32-bit element if r0 is in range add ip, lr, ip, lsl #2 // compute address of last element in table - ldrcs r0, [ip, #3] // load 32-bit element if r0 out of range + ite lo + ldrlo r0, [r0, #3] // load 32-bit element if r0 is in range + ldrhs r0, [ip, #3] // load 32-bit element if r0 out of range add ip, lr, r0 // compute label = lr + element bx ip // jump to computed label diff --git a/compiler-rt/lib/arm/switch8.S b/compiler-rt/lib/arm/switch8.S index e784b4082e1..15729ebc316 100644 --- a/compiler-rt/lib/arm/switch8.S +++ b/compiler-rt/lib/arm/switch8.S @@ -33,7 +33,8 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8) ldrb ip, [lr, #-1] // get first byte in table cmp r0, ip // signed compare with index - ldrsbcc r0, [lr, r0] // get indexed byte out of table + ite lo + ldrsblo r0, [lr, r0] // get indexed byte out of table ldrsbhs r0, [lr, ip] // if out of range, use last entry in table add ip, lr, r0, lsl #1 // compute label = lr + element*2 bx ip // jump to computed label diff --git a/compiler-rt/lib/arm/switchu8.S b/compiler-rt/lib/arm/switchu8.S index 19bed2f664d..0a4efac88ab 100644 --- a/compiler-rt/lib/arm/switchu8.S +++ b/compiler-rt/lib/arm/switchu8.S @@ -33,7 +33,8 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8) ldrb ip, [lr, #-1] // get first byte in table cmp r0, ip // compare with index - ldrbcc r0, [lr, r0] // get indexed byte out of table + ite lo + ldrblo r0, [lr, r0] // get indexed byte out of table ldrbhs r0, [lr, ip] // if out of range, use last entry in table add ip, lr, r0, lsl #1 // compute label = lr + element*2 bx ip // jump to computed label diff --git a/compiler-rt/lib/arm/udivmodsi4.S b/compiler-rt/lib/arm/udivmodsi4.S index 5fe53fe5fd0..aee2776671f 100644 --- a/compiler-rt/lib/arm/udivmodsi4.S +++ b/compiler-rt/lib/arm/udivmodsi4.S @@ -74,14 +74,17 @@ LOCAL_LABEL(mainLoop): // this way, we can merge the two branches which is a substantial win for // such a tight loop on current ARM architectures. subs r, a, b, lsl i + itt hs orrhs q, q,one, lsl i movhs a, r + it ne subsne i, i, #1 bhi LOCAL_LABEL(mainLoop) // Do the final test subtraction and update of quotient (i == 0), as it is // not performed in the main loop. subs r, a, b + itt hs orrhs q, #1 movhs a, r diff --git a/compiler-rt/lib/arm/udivsi3.S b/compiler-rt/lib/arm/udivsi3.S index 1c158251080..2bb14123ca3 100644 --- a/compiler-rt/lib/arm/udivsi3.S +++ b/compiler-rt/lib/arm/udivsi3.S @@ -73,14 +73,17 @@ LOCAL_LABEL(mainLoop): // this way, we can merge the two branches which is a substantial win for // such a tight loop on current ARM architectures. subs r, a, b, lsl i + itt hs orrhs q, q,one, lsl i movhs a, r + it ne subsne i, i, #1 bhi LOCAL_LABEL(mainLoop) // Do the final test subtraction and update of quotient (i == 0), as it is // not performed in the main loop. subs r, a, b + it hs orrhs q, #1 LOCAL_LABEL(return): diff --git a/compiler-rt/lib/arm/umodsi3.S b/compiler-rt/lib/arm/umodsi3.S index 188edf30400..092a4f1a206 100644 --- a/compiler-rt/lib/arm/umodsi3.S +++ b/compiler-rt/lib/arm/umodsi3.S @@ -57,13 +57,16 @@ LOCAL_LABEL(mainLoop): // this way, we can merge the two branches which is a substantial win for // such a tight loop on current ARM architectures. subs r, a, b, lsl i + it hs movhs a, r + it ne subsne i, i, #1 bhi LOCAL_LABEL(mainLoop) // Do the final test subtraction and update of remainder (i == 0), as it is // not performed in the main loop. subs r, a, b + it hs movhs a, r bx lr #endif |

